<!DOCTYPE html>
<!-- saved from url=(0042)https://zybuluo.com/hanbingtao/note/581764 -->
<html class="theme theme-white"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
    
    
    <meta name="description" content="Cmd Markdown 编辑阅读器，支持实时同步预览，区分写作和阅读模式，支持在线存储，分享文稿网址。">
    <meta name="author" content="Jiawei Zhang">

    <meta name="viewport" content="width=device-width, initial-scale=1.0">

    
    <title>零基础入门深度学习(6) - 长短时记忆网络(LSTM) - 作业部落 Cmd Markdown 编辑阅读器</title>


    <link href="https://zybuluo.com/static/img/favicon.png" type="image/x-icon" rel="icon">

    <link href="./零基础入门深度学习(6) - 长短时记忆网络(LSTM) - 作业部落 Cmd Markdown 编辑阅读器_files/1bc053c8.base.lib.min.css" rel="stylesheet" media="screen">


    
    <!-- id="prettify-style" will be used to get the link element below and change href to change prettify code, so it can't be in beginmin/endmin block. -->
    <link id="prettify-style" href="./零基础入门深度学习(6) - 长短时记忆网络(LSTM) - 作业部落 Cmd Markdown 编辑阅读器_files/prettify-cmd.css" type="text/css" rel="stylesheet">
    <!--
    <link id="mermaid-style" href="https://zybuluo.com/static/editor/libs/mermaid/mermaid.forest.css" type="text/css" rel="stylesheet">
    -->
    <link href="./零基础入门深度学习(6) - 长短时记忆网络(LSTM) - 作业部落 Cmd Markdown 编辑阅读器_files/45c7d56d.layout.min.css" rel="stylesheet" media="screen">


    

    <script async="" src="./零基础入门深度学习(6) - 长短时记忆网络(LSTM) - 作业部落 Cmd Markdown 编辑阅读器_files/analytics.js"></script><script>
      (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
      (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
      m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
      })(window,document,'script','//www.google-analytics.com/analytics.js','ga');

      ga('create', 'UA-44461741-1', 'zybuluo.com');
      ga('send', 'pageview');
    </script>
<style type="text/css">.MathJax_Hover_Frame {border-radius: .25em; -webkit-border-radius: .25em; -moz-border-radius: .25em; -khtml-border-radius: .25em; box-shadow: 0px 0px 15px #83A; -webkit-box-shadow: 0px 0px 15px #83A; -moz-box-shadow: 0px 0px 15px #83A; -khtml-box-shadow: 0px 0px 15px #83A; border: 1px solid #A6D ! important; display: inline-block; position: absolute}
.MathJax_Hover_Arrow {position: absolute; width: 15px; height: 11px; cursor: pointer}
</style><style type="text/css">#MathJax_About {position: fixed; left: 50%; width: auto; text-align: center; border: 3px outset; padding: 1em 2em; background-color: #DDDDDD; color: black; cursor: default; font-family: message-box; font-size: 120%; font-style: normal; text-indent: 0; text-transform: none; line-height: normal; letter-spacing: normal; word-spacing: normal; word-wrap: normal; white-space: nowrap; float: none; z-index: 201; border-radius: 15px; -webkit-border-radius: 15px; -moz-border-radius: 15px; -khtml-border-radius: 15px; box-shadow: 0px 10px 20px #808080; -webkit-box-shadow: 0px 10px 20px #808080; -moz-box-shadow: 0px 10px 20px #808080; -khtml-box-shadow: 0px 10px 20px #808080; filter: progid:DXImageTransform.Microsoft.dropshadow(OffX=2, OffY=2, Color='gray', Positive='true')}
.MathJax_Menu {position: absolute; background-color: white; color: black; width: auto; padding: 5px 0px; border: 1px solid #CCCCCC; margin: 0; cursor: default; font: menu; text-align: left; text-indent: 0; text-transform: none; line-height: normal; letter-spacing: normal; word-spacing: normal; word-wrap: normal; white-space: nowrap; float: none; z-index: 201; border-radius: 5px; -webkit-border-radius: 5px; -moz-border-radius: 5px; -khtml-border-radius: 5px; box-shadow: 0px 10px 20px #808080; -webkit-box-shadow: 0px 10px 20px #808080; -moz-box-shadow: 0px 10px 20px #808080; -khtml-box-shadow: 0px 10px 20px #808080; filter: progid:DXImageTransform.Microsoft.dropshadow(OffX=2, OffY=2, Color='gray', Positive='true')}
.MathJax_MenuItem {padding: 1px 2em; background: transparent}
.MathJax_MenuArrow {position: absolute; right: .5em; color: #666666}
.MathJax_MenuActive .MathJax_MenuArrow {color: white}
.MathJax_MenuCheck {position: absolute; left: .7em}
.MathJax_MenuRadioCheck {position: absolute; left: .7em}
.MathJax_MenuLabel {padding: 1px 2em 3px 1.33em; font-style: italic}
.MathJax_MenuRule {border-top: 1px solid #DDDDDD; margin: 4px 3px}
.MathJax_MenuDisabled {color: GrayText}
.MathJax_MenuActive {background-color: #606872; color: white}
.MathJax_Menu_Close {position: absolute; width: 31px; height: 31px; top: -15px; left: -15px}
</style><style type="text/css">#MathJax_Zoom {position: absolute; background-color: #F0F0F0; overflow: auto; display: block; z-index: 301; padding: .5em; border: 1px solid black; margin: 0; font-weight: normal; font-style: normal; text-align: left; text-indent: 0; text-transform: none; line-height: normal; letter-spacing: normal; word-spacing: normal; word-wrap: normal; white-space: nowrap; float: none; box-shadow: 5px 5px 15px #AAAAAA; -webkit-box-shadow: 5px 5px 15px #AAAAAA; -moz-box-shadow: 5px 5px 15px #AAAAAA; -khtml-box-shadow: 5px 5px 15px #AAAAAA; filter: progid:DXImageTransform.Microsoft.dropshadow(OffX=2, OffY=2, Color='gray', Positive='true')}
#MathJax_ZoomOverlay {position: absolute; left: 0; top: 0; z-index: 300; display: inline-block; width: 100%; height: 100%; border: 0; padding: 0; margin: 0; background-color: white; opacity: 0; filter: alpha(opacity=0)}
#MathJax_ZoomFrame {position: relative; display: inline-block; height: 0; width: 0}
#MathJax_ZoomEventTrap {position: absolute; left: 0; top: 0; z-index: 302; display: inline-block; border: 0; padding: 0; margin: 0; background-color: white; opacity: 0; filter: alpha(opacity=0)}
</style><style type="text/css">.MathJax_Preview {color: #888}
#MathJax_Message {position: fixed; left: 1em; bottom: 1.5em; background-color: #E6E6E6; border: 1px solid #959595; margin: 0px; padding: 2px 8px; z-index: 102; color: black; font-size: 80%; width: auto; white-space: nowrap}
#MathJax_MSIE_Frame {position: absolute; top: 0; left: 0; width: 0px; z-index: 101; border: 0px; margin: 0px; padding: 0px}
.MathJax_Error {color: #CC0000; font-style: italic}
</style><style type="text/css">.MathJax_SVG_Display {text-align: center; margin: 1em 0em; position: relative; display: block; width: 100%}
#MathJax_SVG_Tooltip {background-color: InfoBackground; color: InfoText; border: 1px solid black; box-shadow: 2px 2px 5px #AAAAAA; -webkit-box-shadow: 2px 2px 5px #AAAAAA; -moz-box-shadow: 2px 2px 5px #AAAAAA; -khtml-box-shadow: 2px 2px 5px #AAAAAA; padding: 3px 4px; z-index: 401; position: absolute; left: 0; top: 0; width: auto; height: auto; display: none}
.MathJax_SVG {display: inline; font-style: normal; font-weight: normal; line-height: normal; font-size: 100%; font-size-adjust: none; text-indent: 0; text-align: left; text-transform: none; letter-spacing: normal; word-spacing: normal; word-wrap: normal; white-space: nowrap; float: none; direction: ltr; border: 0; padding: 0; margin: 0}
.MathJax_SVG * {transition: none; -webkit-transition: none; -moz-transition: none; -ms-transition: none; -o-transition: none}
.mjx-svg-href {fill: blue; stroke: blue}
.MathJax_SVG_Processing {visibility: hidden; position: absolute; top: 0; left: 0; width: 0; height: 0; overflow: hidden; display: block}
.MathJax_SVG_Processed {display: none!important}
.MathJax_SVG_ExBox {display: block; overflow: hidden; width: 1px; height: 60ex}
.MathJax_SVG .noError {vertical-align: ; font-size: 90%; text-align: left; color: black; padding: 1px 3px; border: 1px solid}
</style></head>

<body class="theme  theme-white pace-done"><div style="visibility: hidden; overflow: hidden; position: absolute; top: 0px; height: 1px; width: auto; padding: 0px; border: 0px; margin: 0px; text-align: left; text-indent: 0px; text-transform: none; line-height: normal; letter-spacing: normal; word-spacing: normal;"><div id="MathJax_SVG_Hidden"></div><svg><defs id="MathJax_SVG_glyphs"><path id="MJMAIN-28" stroke-width="1" d="M94 250Q94 319 104 381T127 488T164 576T202 643T244 695T277 729T302 750H315H319Q333 750 333 741Q333 738 316 720T275 667T226 581T184 443T167 250T184 58T225 -81T274 -167T316 -220T333 -241Q333 -250 318 -250H315H302L274 -226Q180 -141 137 -14T94 250Z"></path><path id="MJMAIN-31" stroke-width="1" d="M213 578L200 573Q186 568 160 563T102 556H83V602H102Q149 604 189 617T245 641T273 663Q275 666 285 666Q294 666 302 660V361L303 61Q310 54 315 52T339 48T401 46H427V0H416Q395 3 257 3Q121 3 100 0H88V46H114Q136 46 152 46T177 47T193 50T201 52T207 57T213 61V578Z"></path><path id="MJMAIN-29" stroke-width="1" d="M60 749L64 750Q69 750 74 750H86L114 726Q208 641 251 514T294 250Q294 182 284 119T261 12T224 -76T186 -143T145 -194T113 -227T90 -246Q87 -249 86 -250H74Q66 -250 63 -250T58 -247T55 -238Q56 -237 66 -225Q221 -64 221 250T66 725Q56 737 55 738Q55 746 60 749Z"></path><path id="MJMATHI-3B4" stroke-width="1" d="M195 609Q195 656 227 686T302 717Q319 716 351 709T407 697T433 690Q451 682 451 662Q451 644 438 628T403 612Q382 612 348 641T288 671T249 657T235 628Q235 584 334 463Q401 379 401 292Q401 169 340 80T205 -10H198Q127 -10 83 36T36 153Q36 286 151 382Q191 413 252 434Q252 435 245 449T230 481T214 521T201 566T195 609ZM112 130Q112 83 136 55T204 27Q233 27 256 51T291 111T309 178T316 232Q316 267 309 298T295 344T269 400L259 396Q215 381 183 342T137 256T118 179T112 130Z"></path><path id="MJMATHI-54" stroke-width="1" d="M40 437Q21 437 21 445Q21 450 37 501T71 602L88 651Q93 669 101 677H569H659Q691 677 697 676T704 667Q704 661 687 553T668 444Q668 437 649 437Q640 437 637 437T631 442L629 445Q629 451 635 490T641 551Q641 586 628 604T573 629Q568 630 515 631Q469 631 457 630T439 622Q438 621 368 343T298 60Q298 48 386 46Q418 46 427 45T436 36Q436 31 433 22Q429 4 424 1L422 0Q419 0 415 0Q410 0 363 1T228 2Q99 2 64 0H49Q43 6 43 9T45 27Q49 40 55 46H83H94Q174 46 189 55Q190 56 191 56Q196 59 201 76T241 233Q258 301 269 344Q339 619 339 625Q339 630 310 630H279Q212 630 191 624Q146 614 121 583T67 467Q60 445 57 441T43 437H40Z"></path><path id="MJMATHI-6B" stroke-width="1" d="M121 647Q121 657 125 670T137 683Q138 683 209 688T282 694Q294 694 294 686Q294 679 244 477Q194 279 194 272Q213 282 223 291Q247 309 292 354T362 415Q402 442 438 442Q468 442 485 423T503 369Q503 344 496 327T477 302T456 291T438 288Q418 288 406 299T394 328Q394 353 410 369T442 390L458 393Q446 405 434 405H430Q398 402 367 380T294 316T228 255Q230 254 243 252T267 246T293 238T320 224T342 206T359 180T365 147Q365 130 360 106T354 66Q354 26 381 26Q429 26 459 145Q461 153 479 153H483Q499 153 499 144Q499 139 496 130Q455 -11 378 -11Q333 -11 305 15T277 90Q277 108 280 121T283 145Q283 167 269 183T234 206T200 217T182 220H180Q168 178 159 139T145 81T136 44T129 20T122 7T111 -2Q98 -11 83 -11Q66 -11 57 -1T48 16Q48 26 85 176T158 471L195 616Q196 629 188 632T149 637H144Q134 637 131 637T124 640T121 647Z"></path><path id="MJMAIN-3D" stroke-width="1" d="M56 347Q56 360 70 367H707Q722 359 722 347Q722 336 708 328L390 327H72Q56 332 56 347ZM56 153Q56 168 72 173H708Q722 163 722 153Q722 140 707 133H70Q56 140 56 153Z"></path><path id="MJMATHI-74" stroke-width="1" d="M26 385Q19 392 19 395Q19 399 22 411T27 425Q29 430 36 430T87 431H140L159 511Q162 522 166 540T173 566T179 586T187 603T197 615T211 624T229 626Q247 625 254 615T261 596Q261 589 252 549T232 470L222 433Q222 431 272 431H323Q330 424 330 420Q330 398 317 385H210L174 240Q135 80 135 68Q135 26 162 26Q197 26 230 60T283 144Q285 150 288 151T303 153H307Q322 153 322 145Q322 142 319 133Q314 117 301 95T267 48T216 6T155 -11Q125 -11 98 4T59 56Q57 64 57 83V101L92 241Q127 382 128 383Q128 385 77 385H26Z"></path><path id="MJSZ2-220F" stroke-width="1" d="M220 812Q220 813 218 819T214 829T208 840T199 853T185 866T166 878T140 887T107 893T66 896H56V950H1221V896H1211Q1080 896 1058 812V-311Q1076 -396 1211 -396H1221V-450H725V-396H735Q864 -396 888 -314Q889 -312 889 -311V896H388V292L389 -311Q405 -396 542 -396H552V-450H56V-396H66Q195 -396 219 -314Q220 -312 220 -311V812Z"></path><path id="MJMATHI-69" stroke-width="1" d="M184 600Q184 624 203 642T247 661Q265 661 277 649T290 619Q290 596 270 577T226 557Q211 557 198 567T184 600ZM21 287Q21 295 30 318T54 369T98 420T158 442Q197 442 223 419T250 357Q250 340 236 301T196 196T154 83Q149 61 149 51Q149 26 166 26Q175 26 185 29T208 43T235 78T260 137Q263 149 265 151T282 153Q302 153 302 143Q302 135 293 112T268 61T223 11T161 -11Q129 -11 102 10T74 74Q74 91 79 106T122 220Q160 321 166 341T173 380Q173 404 156 404H154Q124 404 99 371T61 287Q60 286 59 284T58 281T56 279T53 278T49 278T41 278H27Q21 284 21 287Z"></path><path id="MJMAIN-2212" stroke-width="1" d="M84 237T84 250T98 270H679Q694 262 694 250T679 230H98Q84 237 84 250Z"></path><path id="MJMATHI-64" stroke-width="1" d="M366 683Q367 683 438 688T511 694Q523 694 523 686Q523 679 450 384T375 83T374 68Q374 26 402 26Q411 27 422 35Q443 55 463 131Q469 151 473 152Q475 153 483 153H487H491Q506 153 506 145Q506 140 503 129Q490 79 473 48T445 8T417 -8Q409 -10 393 -10Q359 -10 336 5T306 36L300 51Q299 52 296 50Q294 48 292 46Q233 -10 172 -10Q117 -10 75 30T33 157Q33 205 53 255T101 341Q148 398 195 420T280 442Q336 442 364 400Q369 394 369 396Q370 400 396 505T424 616Q424 629 417 632T378 637H357Q351 643 351 645T353 664Q358 683 366 683ZM352 326Q329 405 277 405Q242 405 210 374T160 293Q131 214 119 129Q119 126 119 118T118 106Q118 61 136 44T179 26Q233 26 290 98L298 109L352 326Z"></path><path id="MJMATHI-61" stroke-width="1" d="M33 157Q33 258 109 349T280 441Q331 441 370 392Q386 422 416 422Q429 422 439 414T449 394Q449 381 412 234T374 68Q374 43 381 35T402 26Q411 27 422 35Q443 55 463 131Q469 151 473 152Q475 153 483 153H487Q506 153 506 144Q506 138 501 117T481 63T449 13Q436 0 417 -8Q409 -10 393 -10Q359 -10 336 5T306 36L300 51Q299 52 296 50Q294 48 292 46Q233 -10 172 -10Q117 -10 75 30T33 157ZM351 328Q351 334 346 350T323 385T277 405Q242 405 210 374T160 293Q131 214 119 129Q119 126 119 118T118 106Q118 61 136 44T179 26Q217 26 254 59T298 110Q300 114 325 217T351 328Z"></path><path id="MJMATHI-67" stroke-width="1" d="M311 43Q296 30 267 15T206 0Q143 0 105 45T66 160Q66 265 143 353T314 442Q361 442 401 394L404 398Q406 401 409 404T418 412T431 419T447 422Q461 422 470 413T480 394Q480 379 423 152T363 -80Q345 -134 286 -169T151 -205Q10 -205 10 -137Q10 -111 28 -91T74 -71Q89 -71 102 -80T116 -111Q116 -121 114 -130T107 -144T99 -154T92 -162L90 -164H91Q101 -167 151 -167Q189 -167 211 -155Q234 -144 254 -122T282 -75Q288 -56 298 -13Q311 35 311 43ZM384 328L380 339Q377 350 375 354T369 368T359 382T346 393T328 402T306 405Q262 405 221 352Q191 313 171 233T151 117Q151 38 213 38Q269 38 323 108L331 118L384 328Z"></path><path id="MJMAIN-5B" stroke-width="1" d="M118 -250V750H255V710H158V-210H255V-250H118Z"></path><path id="MJMATHI-66" stroke-width="1" d="M118 -162Q120 -162 124 -164T135 -167T147 -168Q160 -168 171 -155T187 -126Q197 -99 221 27T267 267T289 382V385H242Q195 385 192 387Q188 390 188 397L195 425Q197 430 203 430T250 431Q298 431 298 432Q298 434 307 482T319 540Q356 705 465 705Q502 703 526 683T550 630Q550 594 529 578T487 561Q443 561 443 603Q443 622 454 636T478 657L487 662Q471 668 457 668Q445 668 434 658T419 630Q412 601 403 552T387 469T380 433Q380 431 435 431Q480 431 487 430T498 424Q499 420 496 407T491 391Q489 386 482 386T428 385H372L349 263Q301 15 282 -47Q255 -132 212 -173Q175 -205 139 -205Q107 -205 81 -186T55 -132Q55 -95 76 -78T118 -61Q162 -61 162 -103Q162 -122 151 -136T127 -157L118 -162Z"></path><path id="MJMAIN-2032" stroke-width="1" d="M79 43Q73 43 52 49T30 61Q30 68 85 293T146 528Q161 560 198 560Q218 560 240 545T262 501Q262 496 260 486Q259 479 173 263T84 45T79 43Z"></path><path id="MJMAINB-6E" stroke-width="1" d="M40 442Q217 450 218 450H224V407L225 365Q233 378 245 391T289 422T362 448Q374 450 398 450Q428 450 448 447T491 434T529 402T551 346Q553 335 554 198V62H623V0H614Q596 3 489 3Q374 3 365 0H356V62H425V194V275Q425 348 416 373T371 399Q326 399 288 370T238 290Q236 281 235 171V62H304V0H295Q277 3 171 3Q64 3 46 0H37V62H106V210V303Q106 353 104 363T91 376Q77 380 50 380H37V442H40Z"></path><path id="MJMAINB-65" stroke-width="1" d="M32 225Q32 332 102 392T272 452H283Q382 452 436 401Q494 343 494 243Q494 226 486 222T440 217Q431 217 394 217T327 218H175V209Q175 177 179 154T196 107T236 69T306 50Q312 49 323 49Q376 49 410 85Q421 99 427 111T434 127T442 133T463 135H468Q494 135 494 117Q494 110 489 97T468 66T431 32T373 5T292 -6Q181 -6 107 55T32 225ZM383 276Q377 346 348 374T280 402Q253 402 230 390T195 357Q179 331 176 279V266H383V276Z"></path><path id="MJMAINB-74" stroke-width="1" d="M272 49Q320 49 320 136V145V177H382V143Q382 106 380 99Q374 62 349 36T285 -2L272 -5H247Q173 -5 134 27Q109 46 102 74T94 160Q94 171 94 199T95 245V382H21V433H25Q58 433 90 456Q121 479 140 523T162 621V635H224V444H363V382H224V239V207V149Q224 98 228 81T249 55Q261 49 272 49Z"></path><path id="MJMAIN-5D" stroke-width="1" d="M22 710V750H159V-250H22V-210H119V710H22Z"></path><path id="MJMATHI-57" stroke-width="1" d="M436 683Q450 683 486 682T553 680Q604 680 638 681T677 682Q695 682 695 674Q695 670 692 659Q687 641 683 639T661 637Q636 636 621 632T600 624T597 615Q597 603 613 377T629 138L631 141Q633 144 637 151T649 170T666 200T690 241T720 295T759 362Q863 546 877 572T892 604Q892 619 873 628T831 637Q817 637 817 647Q817 650 819 660Q823 676 825 679T839 682Q842 682 856 682T895 682T949 681Q1015 681 1034 683Q1048 683 1048 672Q1048 666 1045 655T1038 640T1028 637Q1006 637 988 631T958 617T939 600T927 584L923 578L754 282Q586 -14 585 -15Q579 -22 561 -22Q546 -22 542 -17Q539 -14 523 229T506 480L494 462Q472 425 366 239Q222 -13 220 -15T215 -19Q210 -22 197 -22Q178 -22 176 -15Q176 -12 154 304T131 622Q129 631 121 633T82 637H58Q51 644 51 648Q52 671 64 683H76Q118 680 176 680Q301 680 313 683H323Q329 677 329 674T327 656Q322 641 318 637H297Q236 634 232 620Q262 160 266 136L501 550L499 587Q496 629 489 632Q483 636 447 637Q428 637 422 639T416 648Q416 650 418 660Q419 664 420 669T421 676T424 680T428 682T436 683Z"></path><path id="MJMAIN-32" stroke-width="1" d="M109 429Q82 429 66 447T50 491Q50 562 103 614T235 666Q326 666 387 610T449 465Q449 422 429 383T381 315T301 241Q265 210 201 149L142 93L218 92Q375 92 385 97Q392 99 409 186V189H449V186Q448 183 436 95T421 3V0H50V19V31Q50 38 56 46T86 81Q115 113 136 137Q145 147 170 174T204 211T233 244T261 278T284 308T305 340T320 369T333 401T340 431T343 464Q343 527 309 573T212 619Q179 619 154 602T119 569T109 550Q109 549 114 549Q132 549 151 535T170 489Q170 464 154 447T109 429Z"></path><path id="MJMAIN-2225" stroke-width="1" d="M133 736Q138 750 153 750Q164 750 170 739Q172 735 172 250T170 -239Q164 -250 152 -250Q144 -250 138 -244L137 -243Q133 -241 133 -179T132 250Q132 731 133 736ZM329 739Q334 750 346 750Q353 750 361 744L362 743Q366 741 366 679T367 250T367 -178T362 -243L361 -244Q355 -250 347 -250Q335 -250 329 -239Q327 -235 327 250T329 739Z"></path><path id="MJAMS-2A7D" stroke-width="1" d="M674 636Q682 636 688 630T694 615T687 601Q686 600 417 472L151 346L399 228Q687 92 691 87Q694 81 694 76Q694 58 676 56H670L382 192Q92 329 90 331Q83 336 83 348Q84 359 96 365Q104 369 382 500T665 634Q669 636 674 636ZM94 170Q102 172 104 172Q110 171 254 103T535 -30T678 -98Q694 -106 694 -118Q694 -136 676 -138H670L382 -2Q92 135 90 137Q83 142 83 154Q84 164 94 170Z"></path><path id="MJMAIN-33" stroke-width="1" d="M127 463Q100 463 85 480T69 524Q69 579 117 622T233 665Q268 665 277 664Q351 652 390 611T430 522Q430 470 396 421T302 350L299 348Q299 347 308 345T337 336T375 315Q457 262 457 175Q457 96 395 37T238 -22Q158 -22 100 21T42 130Q42 158 60 175T105 193Q133 193 151 175T169 130Q169 119 166 110T159 94T148 82T136 74T126 70T118 67L114 66Q165 21 238 21Q293 21 321 74Q338 107 338 175V195Q338 290 274 322Q259 328 213 329L171 330L168 332Q166 335 166 348Q166 366 174 366Q202 366 232 371Q266 376 294 413T322 525V533Q322 590 287 612Q265 626 240 626Q208 626 181 615T143 592T132 580H135Q138 579 143 578T153 573T165 566T175 555T183 540T186 520Q186 498 172 481T127 463Z"></path><path id="MJMATHI-3B2" stroke-width="1" d="M29 -194Q23 -188 23 -186Q23 -183 102 134T186 465Q208 533 243 584T309 658Q365 705 429 705H431Q493 705 533 667T573 570Q573 465 469 396L482 383Q533 332 533 252Q533 139 448 65T257 -10Q227 -10 203 -2T165 17T143 40T131 59T126 65L62 -188Q60 -194 42 -194H29ZM353 431Q392 431 427 419L432 422Q436 426 439 429T449 439T461 453T472 471T484 495T493 524T501 560Q503 569 503 593Q503 611 502 616Q487 667 426 667Q384 667 347 643T286 582T247 514T224 455Q219 439 186 308T152 168Q151 163 151 147Q151 99 173 68Q204 26 260 26Q302 26 349 51T425 137Q441 171 449 214T457 279Q457 337 422 372Q380 358 347 358H337Q258 358 258 389Q258 396 261 403Q275 431 353 431Z"></path><path id="MJMATHI-77" stroke-width="1" d="M580 385Q580 406 599 424T641 443Q659 443 674 425T690 368Q690 339 671 253Q656 197 644 161T609 80T554 12T482 -11Q438 -11 404 5T355 48Q354 47 352 44Q311 -11 252 -11Q226 -11 202 -5T155 14T118 53T104 116Q104 170 138 262T173 379Q173 380 173 381Q173 390 173 393T169 400T158 404H154Q131 404 112 385T82 344T65 302T57 280Q55 278 41 278H27Q21 284 21 287Q21 293 29 315T52 366T96 418T161 441Q204 441 227 416T250 358Q250 340 217 250T184 111Q184 65 205 46T258 26Q301 26 334 87L339 96V119Q339 122 339 128T340 136T341 143T342 152T345 165T348 182T354 206T362 238T373 281Q402 395 406 404Q419 431 449 431Q468 431 475 421T483 402Q483 389 454 274T422 142Q420 131 420 107V100Q420 85 423 71T442 42T487 26Q558 26 600 148Q609 171 620 213T632 273Q632 306 619 325T593 357T580 385Z"></path><path id="MJMAIN-34" stroke-width="1" d="M462 0Q444 3 333 3Q217 3 199 0H190V46H221Q241 46 248 46T265 48T279 53T286 61Q287 63 287 115V165H28V211L179 442Q332 674 334 675Q336 677 355 677H373L379 671V211H471V165H379V114Q379 73 379 66T385 54Q393 47 442 46H471V0H462ZM293 211V545L74 212L183 211H293Z"></path><path id="MJMAIN-2207" stroke-width="1" d="M46 676Q46 679 51 683H781Q786 679 786 676Q786 674 617 326T444 -26Q439 -33 416 -33T388 -26Q385 -22 216 326T46 676ZM697 596Q697 597 445 597T193 596Q195 591 319 336T445 80L697 596Z"></path><path id="MJMATHI-45" stroke-width="1" d="M492 213Q472 213 472 226Q472 230 477 250T482 285Q482 316 461 323T364 330H312Q311 328 277 192T243 52Q243 48 254 48T334 46Q428 46 458 48T518 61Q567 77 599 117T670 248Q680 270 683 272Q690 274 698 274Q718 274 718 261Q613 7 608 2Q605 0 322 0H133Q31 0 31 11Q31 13 34 25Q38 41 42 43T65 46Q92 46 125 49Q139 52 144 61Q146 66 215 342T285 622Q285 629 281 629Q273 632 228 634H197Q191 640 191 642T193 659Q197 676 203 680H757Q764 676 764 669Q764 664 751 557T737 447Q735 440 717 440H705Q698 445 698 453L701 476Q704 500 704 528Q704 558 697 578T678 609T643 625T596 632T532 634H485Q397 633 392 631Q388 629 386 622Q385 619 355 499T324 377Q347 376 372 376H398Q464 376 489 391T534 472Q538 488 540 490T557 493Q562 493 565 493T570 492T572 491T574 487T577 483L544 351Q511 218 508 216Q505 213 492 213Z"></path><path id="MJSZ2-2211" stroke-width="1" d="M60 948Q63 950 665 950H1267L1325 815Q1384 677 1388 669H1348L1341 683Q1320 724 1285 761Q1235 809 1174 838T1033 881T882 898T699 902H574H543H251L259 891Q722 258 724 252Q725 250 724 246Q721 243 460 -56L196 -356Q196 -357 407 -357Q459 -357 548 -357T676 -358Q812 -358 896 -353T1063 -332T1204 -283T1307 -196Q1328 -170 1348 -124H1388Q1388 -125 1381 -145T1356 -210T1325 -294L1267 -449L666 -450Q64 -450 61 -448Q55 -446 55 -439Q55 -437 57 -433L590 177Q590 178 557 222T452 366T322 544L56 909L55 924Q55 945 60 948Z"></path><path id="MJMAIN-35" stroke-width="1" d="M164 157Q164 133 148 117T109 101H102Q148 22 224 22Q294 22 326 82Q345 115 345 210Q345 313 318 349Q292 382 260 382H254Q176 382 136 314Q132 307 129 306T114 304Q97 304 95 310Q93 314 93 485V614Q93 664 98 664Q100 666 102 666Q103 666 123 658T178 642T253 634Q324 634 389 662Q397 666 402 666Q410 666 410 648V635Q328 538 205 538Q174 538 149 544L139 546V374Q158 388 169 396T205 412T256 420Q337 420 393 355T449 201Q449 109 385 44T229 -22Q148 -22 99 32T50 154Q50 178 61 192T84 210T107 214Q132 214 148 197T164 157Z"></path><path id="MJMAIN-2B" stroke-width="1" d="M56 237T56 250T70 270H369V420L370 570Q380 583 389 583Q402 583 409 568V270H707Q722 262 722 250T707 230H409V-68Q401 -82 391 -82H389H387Q375 -82 369 -68V230H70Q56 237 56 250Z"></path><path id="MJMAIN-2E" stroke-width="1" d="M78 60Q78 84 95 102T138 120Q162 120 180 104T199 61Q199 36 182 18T139 0T96 17T78 60Z"></path><path id="MJMAINB-78" stroke-width="1" d="M227 0Q212 3 121 3Q40 3 28 0H21V62H117L245 213L109 382H26V444H34Q49 441 143 441Q247 441 265 444H274V382H246L281 339Q315 297 316 297Q320 297 354 341L389 382H352V444H360Q375 441 466 441Q547 441 559 444H566V382H471L355 246L504 63L545 62H586V0H578Q563 3 469 3Q365 3 347 0H338V62H366Q366 63 326 112T285 163L198 63L217 62H235V0H227Z"></path><path id="MJMAINB-68" stroke-width="1" d="M40 686L131 690Q222 694 223 694H229V533L230 372L238 381Q248 394 264 407T317 435T398 450Q428 450 448 447T491 434T529 402T551 346Q553 335 554 198V62H623V0H614Q596 3 489 3Q374 3 365 0H356V62H425V194V275Q425 348 416 373T371 399Q326 399 288 370T238 290Q236 281 235 171V62H304V0H295Q277 3 171 3Q64 3 46 0H37V62H106V332Q106 387 106 453T107 534Q107 593 105 605T91 620Q77 624 50 624H37V686H40Z"></path><path id="MJMAINB-63" stroke-width="1" d="M447 131H458Q478 131 478 117Q478 112 471 95T439 51T377 9Q330 -6 286 -6Q196 -6 135 35Q39 96 39 222Q39 324 101 384Q169 453 286 453Q359 453 411 431T464 353Q464 319 445 302T395 284Q360 284 343 305T325 353Q325 380 338 396H333Q317 398 295 398H292Q280 398 271 397T245 390T218 373T197 338T183 283Q182 275 182 231Q182 199 184 180T193 132T220 85T270 57Q289 50 317 50H326Q385 50 414 115Q419 127 423 129T447 131Z"></path><path id="MJMAINB-62" stroke-width="1" d="M32 686L123 690Q214 694 215 694H221V409Q289 450 378 450Q479 450 539 387T600 221Q600 122 535 58T358 -6H355Q272 -6 203 53L160 1L129 0H98V301Q98 362 98 435T99 525Q99 591 97 604T83 620Q69 624 42 624H29V686H32ZM227 105L232 99Q237 93 242 87T258 73T280 59T306 49T339 45Q380 45 411 66T451 131Q457 160 457 230Q457 264 456 284T448 329T430 367T396 389T343 398Q282 398 235 355L227 348V105Z"></path><path id="MJMATHI-3C3" stroke-width="1" d="M184 -11Q116 -11 74 34T31 147Q31 247 104 333T274 430Q275 431 414 431H552Q553 430 555 429T559 427T562 425T565 422T567 420T569 416T570 412T571 407T572 401Q572 357 507 357Q500 357 490 357T476 358H416L421 348Q439 310 439 263Q439 153 359 71T184 -11ZM361 278Q361 358 276 358Q152 358 115 184Q114 180 114 178Q106 141 106 117Q106 67 131 47T188 26Q242 26 287 73Q316 103 334 153T356 233T361 278Z"></path><path id="MJMAINB-66" stroke-width="1" d="M308 0Q290 3 172 3Q58 3 49 0H40V62H109V382H42V444H109V503L110 562L112 572Q127 625 178 658T316 699Q318 699 330 699T348 700Q381 698 404 687T436 658T449 629T452 606Q452 576 432 557T383 537Q355 537 335 555T314 605Q314 635 328 649H325Q311 649 293 644T253 618T227 560Q226 555 226 498V444H340V382H232V62H318V0H308Z"></path><path id="MJMAIN-22C5" stroke-width="1" d="M78 250Q78 274 95 292T138 310Q162 310 180 294T199 251Q199 226 182 208T139 190T96 207T78 250Z"></path><path id="MJMAIN-2C" stroke-width="1" d="M78 35T78 60T94 103T137 121Q165 121 187 96T210 8Q210 -27 201 -60T180 -117T154 -158T130 -185T117 -194Q113 -194 104 -185T95 -172Q95 -168 106 -156T131 -126T157 -76T173 -3V9L172 8Q170 7 167 6T161 3T152 1T140 0Q113 0 96 17Z"></path><path id="MJMATHI-78" stroke-width="1" d="M52 289Q59 331 106 386T222 442Q257 442 286 424T329 379Q371 442 430 442Q467 442 494 420T522 361Q522 332 508 314T481 292T458 288Q439 288 427 299T415 328Q415 374 465 391Q454 404 425 404Q412 404 406 402Q368 386 350 336Q290 115 290 78Q290 50 306 38T341 26Q378 26 414 59T463 140Q466 150 469 151T485 153H489Q504 153 504 145Q504 144 502 134Q486 77 440 33T333 -11Q263 -11 227 52Q186 -10 133 -10H127Q78 -10 57 16T35 71Q35 103 54 123T99 143Q142 143 142 101Q142 81 130 66T107 46T94 41L91 40Q91 39 97 36T113 29T132 26Q168 26 194 71Q203 87 217 139T245 247T261 313Q266 340 266 352Q266 380 251 392T217 404Q177 404 142 372T93 290Q91 281 88 280T72 278H58Q52 284 52 289Z"></path><path id="MJMATHI-68" stroke-width="1" d="M137 683Q138 683 209 688T282 694Q294 694 294 685Q294 674 258 534Q220 386 220 383Q220 381 227 388Q288 442 357 442Q411 442 444 415T478 336Q478 285 440 178T402 50Q403 36 407 31T422 26Q450 26 474 56T513 138Q516 149 519 151T535 153Q555 153 555 145Q555 144 551 130Q535 71 500 33Q466 -10 419 -10H414Q367 -10 346 17T325 74Q325 90 361 192T398 345Q398 404 354 404H349Q266 404 205 306L198 293L164 158Q132 28 127 16Q114 -11 83 -11Q69 -11 59 -2T48 16Q48 30 121 320L195 616Q195 629 188 632T149 637H128Q122 643 122 645T124 664Q129 683 137 683Z"></path><path id="MJMATHI-63" stroke-width="1" d="M34 159Q34 268 120 355T306 442Q362 442 394 418T427 355Q427 326 408 306T360 285Q341 285 330 295T319 325T330 359T352 380T366 386H367Q367 388 361 392T340 400T306 404Q276 404 249 390Q228 381 206 359Q162 315 142 235T121 119Q121 73 147 50Q169 26 205 26H209Q321 26 394 111Q403 121 406 121Q410 121 419 112T429 98T420 83T391 55T346 25T282 0T202 -11Q127 -11 81 37T34 159Z"></path><path id="MJMAIN-D7" stroke-width="1" d="M630 29Q630 9 609 9Q604 9 587 25T493 118L389 222L284 117Q178 13 175 11Q171 9 168 9Q160 9 154 15T147 29Q147 36 161 51T255 146L359 250L255 354Q174 435 161 449T147 471Q147 480 153 485T168 490Q173 490 175 489Q178 487 284 383L389 278L493 382Q570 459 587 475T609 491Q630 491 630 471Q630 464 620 453T522 355L418 250L522 145Q606 61 618 48T630 29Z"></path><path id="MJMAIN-36" stroke-width="1" d="M42 313Q42 476 123 571T303 666Q372 666 402 630T432 550Q432 525 418 510T379 495Q356 495 341 509T326 548Q326 592 373 601Q351 623 311 626Q240 626 194 566Q147 500 147 364L148 360Q153 366 156 373Q197 433 263 433H267Q313 433 348 414Q372 400 396 374T435 317Q456 268 456 210V192Q456 169 451 149Q440 90 387 34T253 -22Q225 -22 199 -14T143 16T92 75T56 172T42 313ZM257 397Q227 397 205 380T171 335T154 278T148 216Q148 133 160 97T198 39Q222 21 251 21Q302 21 329 59Q342 77 347 104T352 209Q352 289 347 316T329 361Q302 397 257 397Z"></path><path id="MJSZ1-5B" stroke-width="1" d="M202 -349V850H394V810H242V-309H394V-349H202Z"></path><path id="MJSZ1-5D" stroke-width="1" d="M22 810V850H214V-349H22V-309H174V810H22Z"></path><path id="MJSZ3-5B" stroke-width="1" d="M247 -949V1450H516V1388H309V-887H516V-949H247Z"></path><path id="MJSZ3-5D" stroke-width="1" d="M11 1388V1450H280V-949H11V-887H218V1388H11Z"></path><path id="MJMAIN-37" stroke-width="1" d="M55 458Q56 460 72 567L88 674Q88 676 108 676H128V672Q128 662 143 655T195 646T364 644H485V605L417 512Q408 500 387 472T360 435T339 403T319 367T305 330T292 284T284 230T278 162T275 80Q275 66 275 52T274 28V19Q270 2 255 -10T221 -22Q210 -22 200 -19T179 0T168 40Q168 198 265 368Q285 400 349 489L395 552H302Q128 552 119 546Q113 543 108 522T98 479L95 458V455H55V458Z"></path><path id="MJMAINB-69" stroke-width="1" d="M72 610Q72 649 98 672T159 695Q193 693 217 670T241 610Q241 572 217 549T157 525Q120 525 96 548T72 610ZM46 442L136 446L226 450H232V62H294V0H286Q271 3 171 3Q67 3 49 0H40V62H109V209Q109 358 108 362Q103 380 55 380H43V442H46Z"></path><path id="MJMAINB-7E" stroke-width="1" d="M343 202Q320 202 278 225T215 249Q181 249 146 214L134 202L115 219Q111 222 106 226T98 234L96 236Q158 306 165 313Q199 344 230 344Q239 344 244 343Q262 339 300 318T359 297Q393 297 428 332L440 344L459 327Q463 324 468 320T476 312L478 310Q416 240 409 233Q375 202 343 202Z"></path><path id="MJMAIN-74" stroke-width="1" d="M27 422Q80 426 109 478T141 600V615H181V431H316V385H181V241Q182 116 182 100T189 68Q203 29 238 29Q282 29 292 100Q293 108 293 146V181H333V146V134Q333 57 291 17Q264 -10 221 -10Q187 -10 162 2T124 33T105 68T98 100Q97 107 97 248V385H18V422H27Z"></path><path id="MJMAIN-61" stroke-width="1" d="M137 305T115 305T78 320T63 359Q63 394 97 421T218 448Q291 448 336 416T396 340Q401 326 401 309T402 194V124Q402 76 407 58T428 40Q443 40 448 56T453 109V145H493V106Q492 66 490 59Q481 29 455 12T400 -6T353 12T329 54V58L327 55Q325 52 322 49T314 40T302 29T287 17T269 6T247 -2T221 -8T190 -11Q130 -11 82 20T34 107Q34 128 41 147T68 188T116 225T194 253T304 268H318V290Q318 324 312 340Q290 411 215 411Q197 411 181 410T156 406T148 403Q170 388 170 359Q170 334 154 320ZM126 106Q126 75 150 51T209 26Q247 26 276 49T315 109Q317 116 318 175Q318 233 317 233Q309 233 296 232T251 223T193 203T147 166T126 106Z"></path><path id="MJMAIN-6E" stroke-width="1" d="M41 46H55Q94 46 102 60V68Q102 77 102 91T102 122T103 161T103 203Q103 234 103 269T102 328V351Q99 370 88 376T43 385H25V408Q25 431 27 431L37 432Q47 433 65 434T102 436Q119 437 138 438T167 441T178 442H181V402Q181 364 182 364T187 369T199 384T218 402T247 421T285 437Q305 442 336 442Q450 438 463 329Q464 322 464 190V104Q464 66 466 59T477 49Q498 46 526 46H542V0H534L510 1Q487 2 460 2T422 3Q319 3 310 0H302V46H318Q379 46 379 62Q380 64 380 200Q379 335 378 343Q372 371 358 385T334 402T308 404Q263 404 229 370Q202 343 195 315T187 232V168V108Q187 78 188 68T191 55T200 49Q221 46 249 46H265V0H257L234 1Q210 2 183 2T145 3Q42 3 33 0H25V46H41Z"></path><path id="MJMAIN-68" stroke-width="1" d="M41 46H55Q94 46 102 60V68Q102 77 102 91T102 124T102 167T103 217T103 272T103 329Q103 366 103 407T103 482T102 542T102 586T102 603Q99 622 88 628T43 637H25V660Q25 683 27 683L37 684Q47 685 66 686T103 688Q120 689 140 690T170 693T181 694H184V367Q244 442 328 442Q451 442 463 329Q464 322 464 190V104Q464 66 466 59T477 49Q498 46 526 46H542V0H534L510 1Q487 2 460 2T422 3Q319 3 310 0H302V46H318Q379 46 379 62Q380 64 380 200Q379 335 378 343Q372 371 358 385T334 402T308 404Q263 404 229 370Q202 343 195 315T187 232V168V108Q187 78 188 68T191 55T200 49Q221 46 249 46H265V0H257L234 1Q210 2 183 2T145 3Q42 3 33 0H25V46H41Z"></path><path id="MJMAIN-2218" stroke-width="1" d="M55 251Q55 328 112 386T249 444T386 388T444 249Q444 171 388 113T250 55Q170 55 113 112T55 251ZM245 403Q188 403 142 361T96 250Q96 183 141 140T250 96Q284 96 313 109T354 135T375 160Q403 197 403 250Q403 313 360 358T245 403Z"></path><path id="MJMAINB-6F" stroke-width="1" d="M287 -5Q228 -5 182 10T109 48T63 102T39 161T32 219Q32 272 50 314T94 382T154 423T214 446T265 452H279Q319 452 326 451Q428 439 485 376T542 221Q542 156 514 108T442 33Q384 -5 287 -5ZM399 230V250Q399 280 398 298T391 338T372 372T338 392T282 401Q241 401 212 380Q190 363 183 334T175 230Q175 202 175 189T177 153T183 118T195 91T215 68T245 56T287 50Q348 50 374 84Q388 101 393 132T399 230Z"></path><path id="MJMATHI-6F" stroke-width="1" d="M201 -11Q126 -11 80 38T34 156Q34 221 64 279T146 380Q222 441 301 441Q333 441 341 440Q354 437 367 433T402 417T438 387T464 338T476 268Q476 161 390 75T201 -11ZM121 120Q121 70 147 48T206 26Q250 26 289 58T351 142Q360 163 374 216T388 308Q388 352 370 375Q346 405 306 405Q243 405 195 347Q158 303 140 230T121 120Z"></path><path id="MJMAIN-38" stroke-width="1" d="M70 417T70 494T124 618T248 666Q319 666 374 624T429 515Q429 485 418 459T392 417T361 389T335 371T324 363L338 354Q352 344 366 334T382 323Q457 264 457 174Q457 95 399 37T249 -22Q159 -22 101 29T43 155Q43 263 172 335L154 348Q133 361 127 368Q70 417 70 494ZM286 386L292 390Q298 394 301 396T311 403T323 413T334 425T345 438T355 454T364 471T369 491T371 513Q371 556 342 586T275 624Q268 625 242 625Q201 625 165 599T128 534Q128 511 141 492T167 463T217 431Q224 426 228 424L286 386ZM250 21Q308 21 350 55T392 137Q392 154 387 169T375 194T353 216T330 234T301 253T274 270Q260 279 244 289T218 306L210 311Q204 311 181 294T133 239T107 157Q107 98 150 60T250 21Z"></path><path id="MJMATHI-7A" stroke-width="1" d="M347 338Q337 338 294 349T231 360Q211 360 197 356T174 346T162 335T155 324L153 320Q150 317 138 317Q117 317 117 325Q117 330 120 339Q133 378 163 406T229 440Q241 442 246 442Q271 442 291 425T329 392T367 375Q389 375 411 408T434 441Q435 442 449 442H462Q468 436 468 434Q468 430 463 420T449 399T432 377T418 358L411 349Q368 298 275 214T160 106L148 94L163 93Q185 93 227 82T290 71Q328 71 360 90T402 140Q406 149 409 151T424 153Q443 153 443 143Q443 138 442 134Q425 72 376 31T278 -11Q252 -11 232 6T193 40T155 57Q111 57 76 -3Q70 -11 59 -11H54H41Q35 -5 35 -2Q35 13 93 84Q132 129 225 214T340 322Q352 338 347 338Z"></path><path id="MJMATHI-79" stroke-width="1" d="M21 287Q21 301 36 335T84 406T158 442Q199 442 224 419T250 355Q248 336 247 334Q247 331 231 288T198 191T182 105Q182 62 196 45T238 27Q261 27 281 38T312 61T339 94Q339 95 344 114T358 173T377 247Q415 397 419 404Q432 431 462 431Q475 431 483 424T494 412T496 403Q496 390 447 193T391 -23Q363 -106 294 -155T156 -205Q111 -205 77 -183T43 -117Q43 -95 50 -80T69 -58T89 -48T106 -45Q150 -45 150 -87Q150 -107 138 -122T115 -142T102 -147L99 -148Q101 -153 118 -160T152 -167H160Q177 -167 186 -165Q219 -156 247 -127T290 -65T313 -9T321 21L315 17Q309 13 296 6T270 -6Q250 -11 231 -11Q185 -11 150 11T104 82Q103 89 103 113Q103 170 138 262T173 379Q173 380 173 381Q173 390 173 393T169 400T158 404H154Q131 404 112 385T82 344T65 302T57 280Q55 278 41 278H27Q21 284 21 287Z"></path><path id="MJMATHI-65" stroke-width="1" d="M39 168Q39 225 58 272T107 350T174 402T244 433T307 442H310Q355 442 388 420T421 355Q421 265 310 237Q261 224 176 223Q139 223 138 221Q138 219 132 186T125 128Q125 81 146 54T209 26T302 45T394 111Q403 121 406 121Q410 121 419 112T429 98T420 82T390 55T344 24T281 -1T205 -11Q126 -11 83 42T39 168ZM373 353Q367 405 305 405Q272 405 244 391T199 357T170 316T154 280T149 261Q149 260 169 260Q282 260 327 284T373 353Z"></path><path id="MJMAIN-39" stroke-width="1" d="M352 287Q304 211 232 211Q154 211 104 270T44 396Q42 412 42 436V444Q42 537 111 606Q171 666 243 666Q245 666 249 666T257 665H261Q273 665 286 663T323 651T370 619T413 560Q456 472 456 334Q456 194 396 97Q361 41 312 10T208 -22Q147 -22 108 7T68 93T121 149Q143 149 158 135T173 96Q173 78 164 65T148 49T135 44L131 43Q131 41 138 37T164 27T206 22H212Q272 22 313 86Q352 142 352 280V287ZM244 248Q292 248 321 297T351 430Q351 508 343 542Q341 552 337 562T323 588T293 615T246 625Q208 625 181 598Q160 576 154 546T147 441Q147 358 152 329T172 282Q197 248 244 248Z"></path><path id="MJMAIN-30" stroke-width="1" d="M96 585Q152 666 249 666Q297 666 345 640T423 548Q460 465 460 320Q460 165 417 83Q397 41 362 16T301 -15T250 -22Q224 -22 198 -16T137 16T82 83Q39 165 39 320Q39 494 96 585ZM321 597Q291 629 250 629Q208 629 178 597Q153 571 145 525T137 333Q137 175 145 125T181 46Q209 16 250 16Q290 16 318 46Q347 76 354 130T362 333Q362 478 354 524T321 597Z"></path><path id="MJMAINB-61" stroke-width="1" d="M64 349Q64 399 107 426T255 453Q346 453 402 423T473 341Q478 327 478 310T479 196V77Q493 63 529 62Q549 62 553 57T558 31Q558 9 552 5T514 0H497H481Q375 0 367 56L356 46Q300 -6 210 -6Q130 -6 81 30T32 121Q32 188 111 226T332 272H350V292Q350 313 348 327T337 361T306 391T248 402T194 399H189Q204 376 204 354Q204 327 187 306T134 284Q97 284 81 305T64 349ZM164 121Q164 89 186 67T238 45Q274 45 307 63T346 108L350 117V226H347Q248 218 206 189T164 121Z"></path><path id="MJMATHI-6E" stroke-width="1" d="M21 287Q22 293 24 303T36 341T56 388T89 425T135 442Q171 442 195 424T225 390T231 369Q231 367 232 367L243 378Q304 442 382 442Q436 442 469 415T503 336T465 179T427 52Q427 26 444 26Q450 26 453 27Q482 32 505 65T540 145Q542 153 560 153Q580 153 580 145Q580 144 576 130Q568 101 554 73T508 17T439 -10Q392 -10 371 17T350 73Q350 92 386 193T423 345Q423 404 379 404H374Q288 404 229 303L222 291L189 157Q156 26 151 16Q138 -11 108 -11Q95 -11 87 -5T76 7T74 17Q74 30 112 180T152 343Q153 348 153 366Q153 405 129 405Q91 405 66 305Q60 285 60 284Q58 278 41 278H27Q21 284 21 287Z"></path></defs></svg></div><div id="MathJax_Message" style="display: none;"></div><div class="pace  pace-inactive"><div class="pace-progress" data-progress-text="100%" data-progress="99" style="width: 100%;">
  <div class="pace-progress-inner"></div>
</div>
<div class="pace-activity"></div></div>

    <div id="global-prompt-alert" class="hide alert alert-warning">
        <span id="global-prompt-message"></span>
        <a id="close-global-prompt-alert" href="https://zybuluo.com/hanbingtao/note/581764">[关闭]</a>
    </div>

    <!-- zybuluo's body -->
    







<!-- mdeditor's body -->






<div id="editor-reader-full" class="editor-reader-full-shown" style="position: static; padding-right: 75px;">
    <div id="reader-full-topInfo" class="reader-full-topInfo-shown">
        <span>
            <code>@hanbingtao</code>
        </span>
        <code><span class="article-updated-date">2017-08-28 19:55</span></code>
        <code><span>字数 </span><span class="article-characters">27396</span></code>
        <code><span>阅读 </span><span class="article-read">181367</span></code>
    </div>
    <div id="wmd-preview" class="wmd-preview wmd-preview-full-reader" data-medium-element="true"><div class="md-section-divider"></div><div class="md-section-divider"></div><h1 data-anchor-id="d0f1" id="零基础入门深度学习6-长短时记忆网络lstm">零基础入门深度学习(6) - 长短时记忆网络(LSTM)</h1><p data-anchor-id="p3ho"><code>机器学习</code> <code>深度学习入门</code></p><hr><p data-anchor-id="qc7u"><img src="./零基础入门深度学习(6) - 长短时记忆网络(LSTM) - 作业部落 Cmd Markdown 编辑阅读器_files/2256672-06627c71f0d8c0dc.jpg" alt=""></p><blockquote data-anchor-id="vfzn" class="white-blockquote">
  <p>无论即将到来的是大数据时代还是人工智能时代，亦或是传统行业使用人工智能在云上处理大数据的时代，作为一个有理想有追求的程序员，不懂深度学习（Deep Learning）这个超热的技术，会不会感觉马上就out了？现在救命稻草来了，《零基础入门深度学习》系列文章旨在讲帮助爱编程的你从零基础达到入门级水平。零基础意味着你不需要太多的数学知识，只要会写程序就行了，没错，这是专门为程序员写的文章。虽然文中会有很多公式你也许看不懂，但同时也会有更多的代码，程序员的你一定能看懂的（我周围是一群狂热的Clean Code程序员，所以我写的代码也不会很差）。</p>
</blockquote><div class="md-section-divider"></div><h2 data-anchor-id="56rq" id="文章列表">文章列表</h2><p data-anchor-id="yzpo"><a href="https://www.zybuluo.com/hanbingtao/note/433855" target="_blank">零基础入门深度学习(1) - 感知器</a> <br>
<a href="https://www.zybuluo.com/hanbingtao/note/448086" target="_blank">零基础入门深度学习(2) - 线性单元和梯度下降</a> <br>
<a href="https://www.zybuluo.com/hanbingtao/note/476663" target="_blank">零基础入门深度学习(3) - 神经网络和反向传播算法</a> <br>
<a href="https://www.zybuluo.com/hanbingtao/note/485480" target="_blank">零基础入门深度学习(4) - 卷积神经网络</a> <br>
<a href="https://zybuluo.com/hanbingtao/note/541458" target="_blank">零基础入门深度学习(5) - 循环神经网络</a> <br>
<a href="https://zybuluo.com/hanbingtao/note/581764" target="_blank">零基础入门深度学习(6) - 长短时记忆网络(LSTM)</a> <br>
<a href="https://zybuluo.com/hanbingtao/note/626300" target="_blank">零基础入门深度学习(7) - 递归神经网络</a></p><div class="md-section-divider"></div><h2 data-anchor-id="7ln8" id="往期回顾">往期回顾</h2><p data-anchor-id="lra8">在上一篇文章中，我们介绍了<strong>循环神经网络</strong>以及它的训练算法。我们也介绍了<strong>循环神经网络</strong>很难训练的原因，这导致了它在实际应用中，很难处理长距离的依赖。在本文中，我们将介绍一种改进之后的循环神经网络：<strong>长短时记忆网络(Long Short Term Memory Network, LSTM)</strong>，它成功的解决了原始循环神经网络的缺陷，成为当前最流行的RNN，在语音识别、图片描述、自然语言处理等许多领域中成功应用。但不幸的一面是，<strong>LSTM</strong>的结构很复杂，因此，我们需要花上一些力气，才能把LSTM以及它的训练算法弄明白。在搞清楚<strong>LSTM</strong>之后，我们再介绍一种<strong>LSTM</strong>的变体：<strong>GRU (Gated Recurrent Unit)</strong>。 它的结构比<strong>LSTM</strong>简单，而效果却和<strong>LSTM</strong>一样好，因此，它正在逐渐流行起来。最后，我们仍然会动手实现一个<strong>LSTM</strong>。</p><div class="md-section-divider"></div><h2 data-anchor-id="1th5" id="长短时记忆网络是啥">长短时记忆网络是啥</h2><p data-anchor-id="gqyw">我们首先了解一下长短时记忆网络产生的背景。回顾一下<a href="https://zybuluo.com/hanbingtao/note/541458" target="_blank">零基础入门深度学习(5) - 循环神经网络</a>中推导的，误差项沿时间反向传播的公式：</p><div class="md-section-divider"></div><p data-anchor-id="ed2x"><span class="MathJax_Preview"></span></p><div class="MathJax_SVG_Display" role="textbox" aria-readonly="true" style="text-align: center;"><span class="MathJax_SVG" id="MathJax-Element-1-Frame" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -1739.3154280431506 44270.673387096766 2978.6308560863013" style="width: 102.823ex; height: 6.895ex; vertical-align: -3.024ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><g transform="translate(16241,0)"><g transform="translate(-15,0)"><g transform="translate(0,-3)"><use href="#MJMATHI-3B4"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-54" x="641" y="488"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-6B" x="628" y="-463"></use><use href="#MJMAIN-3D" x="1329" y="0"></use></g></g><g transform="translate(2371,0)"><g transform="translate(0,-3)"><use href="#MJMATHI-3B4"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-54" x="641" y="488"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-74" x="628" y="-395"></use><g transform="translate(1218,0)"><use href="#MJSZ2-220F"></use><g transform="translate(57,-1110)"><use transform="scale(0.7071067811865476)" href="#MJMATHI-69"></use><use transform="scale(0.7071067811865476)" href="#MJMAIN-3D" x="345" y="0"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-6B" x="1124" y="0"></use></g><g transform="translate(59,1151)"><use transform="scale(0.7071067811865476)" href="#MJMATHI-74"></use><use transform="scale(0.7071067811865476)" href="#MJMAIN-2212" x="361" y="0"></use><use transform="scale(0.7071067811865476)" href="#MJMAIN-31" x="1140" y="0"></use></g></g><use href="#MJMATHI-64" x="2663" y="0"></use><use href="#MJMATHI-69" x="3187" y="0"></use><use href="#MJMATHI-61" x="3532" y="0"></use><use href="#MJMATHI-67" x="4062" y="0"></use><use href="#MJMAIN-5B" x="4542" y="0"></use><g transform="translate(4821,0)"><use href="#MJMATHI-66"></use><use transform="scale(0.7071067811865476)" href="#MJMAIN-2032" x="804" y="583"></use></g><use href="#MJMAIN-28" x="5684" y="0"></use><g transform="translate(6073,0)"><use href="#MJMAINB-6E"></use><use href="#MJMAINB-65" x="639" y="0"></use><use href="#MJMAINB-74" x="1167" y="0"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-69" x="2283" y="-213"></use></g><use href="#MJMAIN-29" x="8032" y="0"></use><use href="#MJMAIN-5D" x="8422" y="0"></use><use href="#MJMATHI-57" x="8700" y="0"></use></g></g></g><g transform="translate(42191,0)"><g transform="translate(0,-3)"><g id="mjx-eqn-1"><use href="#MJMAIN-28"></use><use href="#MJMAIN-31" x="389" y="0"></use><use href="#MJMAIN-29" x="890" y="0"></use></g></g></g></g></svg></span></div><script type="math/tex; mode=display" id="MathJax-Element-1">
\begin{align}
\delta_k^T=&\delta_t^T\prod_{i=k}^{t-1}diag[f'(\mathbf{net}_{i})]W\\
\end{align}
</script><p></p><p data-anchor-id="evng">我们可以根据下面的不等式，来获取<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-2-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -843.7687737936584 1051.8067273459228 1198.4501374605402" style="width: 2.419ex; height: 2.782ex; vertical-align: -0.968ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use href="#MJMATHI-3B4"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-54" x="641" y="488"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-6B" x="628" y="-463"></use></g></svg></span><script type="math/tex" id="MathJax-Element-2">\delta_k^T</script>的模的上界（模可以看做对<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-3-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -843.7687737936584 1051.8067273459228 1198.4501374605402" style="width: 2.419ex; height: 2.782ex; vertical-align: -0.968ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use href="#MJMATHI-3B4"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-54" x="641" y="488"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-6B" x="628" y="-463"></use></g></svg></span><script type="math/tex" id="MathJax-Element-3">\delta_k^T</script>中每一项值的大小的度量）：</p><div class="md-section-divider"></div><p data-anchor-id="p6kd"><span class="MathJax_Preview"></span></p><div class="MathJax_SVG_Display" role="textbox" aria-readonly="true" style="text-align: center;"><span class="MathJax_SVG" id="MathJax-Element-4-Frame" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -2491.700126726542 44270.673387096766 4483.400253453084" style="width: 102.823ex; height: 10.403ex; vertical-align: -4.718ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><g transform="translate(14239,0)"><g transform="translate(-15,0)"><g transform="translate(0,749)"><use href="#MJMAIN-2225"></use><g transform="translate(500,0)"><use href="#MJMATHI-3B4"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-54" x="641" y="488"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-6B" x="628" y="-463"></use></g><use href="#MJMAIN-2225" x="1552" y="0"></use><use href="#MJAMS-2A7D" x="2330" y="0"></use></g><use href="#MJAMS-2A7D" x="2330" y="-1673"></use></g><g transform="translate(3372,0)"><g transform="translate(0,749)"><use href="#MJMAIN-2225"></use><g transform="translate(500,0)"><use href="#MJMATHI-3B4"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-54" x="641" y="488"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-74" x="628" y="-395"></use></g><use href="#MJMAIN-2225" x="1552" y="0"></use><g transform="translate(2219,0)"><use href="#MJSZ2-220F"></use><g transform="translate(57,-1110)"><use transform="scale(0.7071067811865476)" href="#MJMATHI-69"></use><use transform="scale(0.7071067811865476)" href="#MJMAIN-3D" x="345" y="0"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-6B" x="1124" y="0"></use></g><g transform="translate(59,1151)"><use transform="scale(0.7071067811865476)" href="#MJMATHI-74"></use><use transform="scale(0.7071067811865476)" href="#MJMAIN-2212" x="361" y="0"></use><use transform="scale(0.7071067811865476)" href="#MJMAIN-31" x="1140" y="0"></use></g></g><use href="#MJMAIN-2225" x="3664" y="0"></use><use href="#MJMATHI-64" x="4165" y="0"></use><use href="#MJMATHI-69" x="4688" y="0"></use><use href="#MJMATHI-61" x="5034" y="0"></use><use href="#MJMATHI-67" x="5563" y="0"></use><use href="#MJMAIN-5B" x="6044" y="0"></use><g transform="translate(6322,0)"><use href="#MJMATHI-66"></use><use transform="scale(0.7071067811865476)" href="#MJMAIN-2032" x="804" y="583"></use></g><use href="#MJMAIN-28" x="7185" y="0"></use><g transform="translate(7575,0)"><use href="#MJMAINB-6E"></use><use href="#MJMAINB-65" x="639" y="0"></use><use href="#MJMAINB-74" x="1167" y="0"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-69" x="2283" y="-213"></use></g><use href="#MJMAIN-29" x="9534" y="0"></use><use href="#MJMAIN-5D" x="9923" y="0"></use><use href="#MJMAIN-2225" x="10202" y="0"></use><use href="#MJMAIN-2225" x="10702" y="0"></use><use href="#MJMATHI-57" x="11203" y="0"></use><use href="#MJMAIN-2225" x="12251" y="0"></use></g><g transform="translate(0,-1673)"><use href="#MJMAIN-2225"></use><g transform="translate(500,0)"><use href="#MJMATHI-3B4"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-54" x="641" y="488"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-74" x="628" y="-395"></use></g><use href="#MJMAIN-2225" x="1552" y="0"></use><use href="#MJMAIN-28" x="2052" y="0"></use><g transform="translate(2442,0)"><use href="#MJMATHI-3B2"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-66" x="801" y="-219"></use></g><g transform="translate(3498,0)"><use href="#MJMATHI-3B2"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-57" x="801" y="-213"></use></g><g transform="translate(4905,0)"><use href="#MJMAIN-29"></use><g transform="translate(389,412)"><use transform="scale(0.7071067811865476)" href="#MJMATHI-74"></use><use transform="scale(0.7071067811865476)" href="#MJMAIN-2212" x="361" y="0"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-6B" x="1140" y="0"></use></g></g></g></g></g><g transform="translate(42191,0)"><g transform="translate(0,749)"><g id="mjx-eqn-2"><use href="#MJMAIN-28"></use><use href="#MJMAIN-32" x="389" y="0"></use><use href="#MJMAIN-29" x="890" y="0"></use></g></g><g transform="translate(0,-1673)"><g id="mjx-eqn-3"><use href="#MJMAIN-28"></use><use href="#MJMAIN-33" x="389" y="0"></use><use href="#MJMAIN-29" x="890" y="0"></use></g></g></g></g></svg></span></div><script type="math/tex; mode=display" id="MathJax-Element-4">
\begin{align}
\|\delta_k^T\|\leqslant&\|\delta_t^T\|\prod_{i=k}^{t-1}\|diag[f'(\mathbf{net}_{i})]\|\|W\|\\
\leqslant&\|\delta_t^T\|(\beta_f\beta_W)^{t-k}
\end{align}
</script><p></p><p data-anchor-id="99yc">我们可以看到，误差项<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-5-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -737.2000693679926 451.5 767.4001387359851" style="width: 1.089ex; height: 1.815ex; vertical-align: -0.121ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use href="#MJMATHI-3B4"></use></g></svg></span><script type="math/tex" id="MathJax-Element-5">\delta</script>从t时刻传递到k时刻，其值的上界是<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-6-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -725.2000693679926 2228.904291763356 1044.8383231781165" style="width: 5.202ex; height: 2.419ex; vertical-align: -0.847ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use href="#MJMATHI-3B2"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-66" x="801" y="-219"></use><g transform="translate(1055,0)"><use href="#MJMATHI-3B2"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-77" x="801" y="-213"></use></g></g></svg></span><script type="math/tex" id="MathJax-Element-6">\beta_f\beta_w</script>的指数函数。<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-7-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -725.2000693679926 2228.904291763356 1044.8383231781165" style="width: 5.202ex; height: 2.419ex; vertical-align: -0.847ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use href="#MJMATHI-3B2"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-66" x="801" y="-219"></use><g transform="translate(1055,0)"><use href="#MJMATHI-3B2"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-77" x="801" y="-213"></use></g></g></svg></span><script type="math/tex" id="MathJax-Element-7">\beta_f\beta_w</script>分别是对角矩阵<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-8-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -778.7789736136457 6037.163311116846 1048.9790429816383" style="width: 14.032ex; height: 2.419ex; vertical-align: -0.726ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use href="#MJMATHI-64"></use><use href="#MJMATHI-69" x="523" y="0"></use><use href="#MJMATHI-61" x="869" y="0"></use><use href="#MJMATHI-67" x="1398" y="0"></use><use href="#MJMAIN-5B" x="1879" y="0"></use><g transform="translate(2157,0)"><use href="#MJMATHI-66"></use><use transform="scale(0.7071067811865476)" href="#MJMAIN-2032" x="804" y="513"></use></g><use href="#MJMAIN-28" x="3020" y="0"></use><g transform="translate(3410,0)"><use href="#MJMAINB-6E"></use><use href="#MJMAINB-65" x="639" y="0"></use><use href="#MJMAINB-74" x="1167" y="0"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-69" x="2283" y="-213"></use></g><use href="#MJMAIN-29" x="5369" y="0"></use><use href="#MJMAIN-5D" x="5758" y="0"></use></g></svg></span><script type="math/tex" id="MathJax-Element-8">diag[f'(\mathbf{net}_{i})]</script>和矩阵W模的上界。显然，除非<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-9-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -725.2000693679926 2228.904291763356 1044.8383231781165" style="width: 5.202ex; height: 2.419ex; vertical-align: -0.847ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use href="#MJMATHI-3B2"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-66" x="801" y="-219"></use><g transform="translate(1055,0)"><use href="#MJMATHI-3B2"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-77" x="801" y="-213"></use></g></g></svg></span><script type="math/tex" id="MathJax-Element-9">\beta_f\beta_w</script>乘积的值位于1附近，否则，当t-k很大时（也就是误差传递很多个时刻时），整个式子的值就会变得极小（当<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-10-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -725.2000693679926 2228.904291763356 1044.8383231781165" style="width: 5.202ex; height: 2.419ex; vertical-align: -0.847ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use href="#MJMATHI-3B2"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-66" x="801" y="-219"></use><g transform="translate(1055,0)"><use href="#MJMATHI-3B2"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-77" x="801" y="-213"></use></g></g></svg></span><script type="math/tex" id="MathJax-Element-10">\beta_f\beta_w</script>乘积小于1）或者极大（当<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-11-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -725.2000693679926 2228.904291763356 1044.8383231781165" style="width: 5.202ex; height: 2.419ex; vertical-align: -0.847ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use href="#MJMATHI-3B2"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-66" x="801" y="-219"></use><g transform="translate(1055,0)"><use href="#MJMATHI-3B2"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-77" x="801" y="-213"></use></g></g></svg></span><script type="math/tex" id="MathJax-Element-11">\beta_f\beta_w</script>乘积大于1），前者就是<strong>梯度消失</strong>，后者就是<strong>梯度爆炸</strong>。虽然科学家们搞出了很多技巧（比如怎样初始化权重），让<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-12-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -725.2000693679926 2228.904291763356 1044.8383231781165" style="width: 5.202ex; height: 2.419ex; vertical-align: -0.847ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use href="#MJMATHI-3B2"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-66" x="801" y="-219"></use><g transform="translate(1055,0)"><use href="#MJMATHI-3B2"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-77" x="801" y="-213"></use></g></g></svg></span><script type="math/tex" id="MathJax-Element-12">\beta_f\beta_w</script>的值尽可能贴近于1，终究还是难以抵挡指数函数的威力。</p><p data-anchor-id="dbub"><strong>梯度消失</strong>到底意味着什么？在<a href="https://zybuluo.com/hanbingtao/note/541458" target="_blank">零基础入门深度学习(5) - 循环神经网络</a>中我们已证明，权重数组W最终的梯度是各个时刻的梯度之和，即：</p><div class="md-section-divider"></div><p data-anchor-id="64o9"><span class="MathJax_Preview"></span></p><div class="MathJax_SVG_Display" role="textbox" aria-readonly="true" style="text-align: center;"><span class="MathJax_SVG" id="MathJax-Element-13-Frame" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -2376.17329241942 44270.673387096766 4252.34658483884" style="width: 102.823ex; height: 9.919ex; vertical-align: -4.476ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><g transform="translate(11409,0)"><g transform="translate(-15,0)"><g transform="translate(0,662)"><use href="#MJMAIN-2207"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-57" x="1178" y="-213"></use><use href="#MJMATHI-45" x="1674" y="0"></use></g></g><g transform="translate(2703,0)"><g transform="translate(0,662)"><use href="#MJMAIN-3D"></use><g transform="translate(1056,0)"><use href="#MJSZ2-2211"></use><g transform="translate(85,-1110)"><use transform="scale(0.7071067811865476)" href="#MJMATHI-6B"></use><use transform="scale(0.7071067811865476)" href="#MJMAIN-3D" x="521" y="0"></use><use transform="scale(0.7071067811865476)" href="#MJMAIN-31" x="1300" y="0"></use></g><use transform="scale(0.7071067811865476)" href="#MJMATHI-74" x="840" y="1627"></use></g><g transform="translate(2667,0)"><use href="#MJMAIN-2207"></use><g transform="translate(833,-150)"><use transform="scale(0.7071067811865476)" href="#MJMATHI-57"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-6B" x="1048" y="0"></use></g></g><use href="#MJMATHI-45" x="4711" y="0"></use></g><g transform="translate(0,-1606)"><use href="#MJMAIN-3D"></use><g transform="translate(1056,0)"><use href="#MJMAIN-2207"></use><g transform="translate(833,-150)"><use transform="scale(0.7071067811865476)" href="#MJMATHI-57"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-74" x="1048" y="0"></use></g></g><use href="#MJMATHI-45" x="2986" y="0"></use><use href="#MJMAIN-2B" x="3973" y="0"></use><g transform="translate(4974,0)"><use href="#MJMAIN-2207"></use><g transform="translate(833,-150)"><use transform="scale(0.7071067811865476)" href="#MJMATHI-57"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-74" x="1048" y="0"></use><use transform="scale(0.7071067811865476)" href="#MJMAIN-2212" x="1410" y="0"></use><use transform="scale(0.7071067811865476)" href="#MJMAIN-31" x="2188" y="0"></use></g></g><use href="#MJMATHI-45" x="7809" y="0"></use><use href="#MJMAIN-2B" x="8795" y="0"></use><g transform="translate(9796,0)"><use href="#MJMAIN-2207"></use><g transform="translate(833,-150)"><use transform="scale(0.7071067811865476)" href="#MJMATHI-57"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-74" x="1048" y="0"></use><use transform="scale(0.7071067811865476)" href="#MJMAIN-2212" x="1410" y="0"></use><use transform="scale(0.7071067811865476)" href="#MJMAIN-32" x="2188" y="0"></use></g></g><use href="#MJMATHI-45" x="12631" y="0"></use><use href="#MJMAIN-2B" x="13396" y="0"></use><use href="#MJMAIN-2E" x="14174" y="0"></use><use href="#MJMAIN-2E" x="14619" y="0"></use><use href="#MJMAIN-2E" x="15064" y="0"></use><use href="#MJMAIN-2B" x="15510" y="0"></use><g transform="translate(16288,0)"><use href="#MJMAIN-2207"></use><g transform="translate(833,-150)"><use transform="scale(0.7071067811865476)" href="#MJMATHI-57"></use><use transform="scale(0.7071067811865476)" href="#MJMAIN-31" x="1048" y="0"></use></g></g><use href="#MJMATHI-45" x="18317" y="0"></use></g></g></g><g transform="translate(42191,0)"><g transform="translate(0,662)"><g id="mjx-eqn-4"><use href="#MJMAIN-28"></use><use href="#MJMAIN-34" x="389" y="0"></use><use href="#MJMAIN-29" x="890" y="0"></use></g></g><g transform="translate(0,-1606)"><g id="mjx-eqn-5"><use href="#MJMAIN-28"></use><use href="#MJMAIN-35" x="389" y="0"></use><use href="#MJMAIN-29" x="890" y="0"></use></g></g></g></g></svg></span></div><script type="math/tex; mode=display" id="MathJax-Element-13">
\begin{align}
\nabla_WE&=\sum_{k=1}^t\nabla_{Wk}E\\
&=\nabla_{Wt}E+\nabla_{Wt-1}E+\nabla_{Wt-2}E+...+\nabla_{W1}E
\end{align}
</script><p></p><p data-anchor-id="1klj">假设某轮训练中，各时刻的梯度以及最终的梯度之和如下图：</p><p data-anchor-id="apy6"><img src="./零基础入门深度学习(6) - 长短时记忆网络(LSTM) - 作业部落 Cmd Markdown 编辑阅读器_files/2256672-48784f6366412472.png" alt="" title=""></p><p data-anchor-id="ok9e">我们就可以看到，从上图的t-3时刻开始，梯度已经几乎减少到0了。那么，从这个时刻开始再往之前走，得到的梯度（几乎为零）就不会对最终的梯度值有任何贡献，这就相当于无论t-3时刻之前的网络状态h是什么，在训练中都不会对权重数组W的更新产生影响，也就是网络事实上已经忽略了t-3时刻之前的状态。这就是原始RNN无法处理长距离依赖的原因。</p><p data-anchor-id="v5oy">既然找到了问题的原因，那么我们就能解决它。从问题的定位到解决，科学家们大概花了7、8年时间。终于有一天，Hochreiter和Schmidhuber两位科学家发明出<strong>长短时记忆网络</strong>，一举解决这个问题。</p><p data-anchor-id="qpug">其实，<strong>长短时记忆网络</strong>的思路比较简单。原始RNN的隐藏层只有一个状态，即h，它对于短期的输入非常敏感。那么，假如我们再增加一个状态，即c，让它来保存长期的状态，那么问题不就解决了么？如下图所示：</p><p data-anchor-id="pgn0"><img src="./零基础入门深度学习(6) - 长短时记忆网络(LSTM) - 作业部落 Cmd Markdown 编辑阅读器_files/2256672-71de4194da5a5ec4.png" alt="" title=""></p><p data-anchor-id="r7ab">新增加的状态c，称为<strong>单元状态(cell state)</strong>。我们把上图按照时间维度展开：</p><p data-anchor-id="20aq"><img src="./零基础入门深度学习(6) - 长短时记忆网络(LSTM) - 作业部落 Cmd Markdown 编辑阅读器_files/2256672-715658c134b9d6f1.png" alt="" title=""></p><p data-anchor-id="iz04">上图仅仅是一个示意图，我们可以看出，在t时刻，LSTM的输入有三个：当前时刻网络的输入值<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-14-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -464.20006936799257 963.1191013989369 641.8854201102238" style="width: 2.177ex; height: 1.452ex; vertical-align: -0.484ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use href="#MJMAINB-78"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-74" x="859" y="-213"></use></g></svg></span><script type="math/tex" id="MathJax-Element-14">\mathbf{x}_t</script>、上一时刻LSTM的输出值<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-15-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -714.2000693679926 1899.5086745365313 891.8854201102238" style="width: 4.355ex; height: 2.056ex; vertical-align: -0.484ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use href="#MJMAINB-68"></use><g transform="translate(639,-150)"><use transform="scale(0.7071067811865476)" href="#MJMATHI-74"></use><use transform="scale(0.7071067811865476)" href="#MJMAIN-2212" x="361" y="0"></use><use transform="scale(0.7071067811865476)" href="#MJMAIN-31" x="1140" y="0"></use></g></g></svg></span><script type="math/tex" id="MathJax-Element-15">\mathbf{h}_{t-1}</script>、以及上一时刻的单元状态<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-16-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -473.20006936799257 1771.5086745365313 650.8854201102238" style="width: 4.113ex; height: 1.452ex; vertical-align: -0.484ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use href="#MJMAINB-63"></use><g transform="translate(511,-150)"><use transform="scale(0.7071067811865476)" href="#MJMATHI-74"></use><use transform="scale(0.7071067811865476)" href="#MJMAIN-2212" x="361" y="0"></use><use transform="scale(0.7071067811865476)" href="#MJMAIN-31" x="1140" y="0"></use></g></g></svg></span><script type="math/tex" id="MathJax-Element-16">\mathbf{c}_{t-1}</script>；LSTM的输出有两个：当前时刻LSTM输出值<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-17-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -714.2000693679926 995.1191013989369 891.8854201102238" style="width: 2.298ex; height: 2.056ex; vertical-align: -0.484ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use href="#MJMAINB-68"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-74" x="904" y="-213"></use></g></svg></span><script type="math/tex" id="MathJax-Element-17">\mathbf{h}_t</script>、和当前时刻的单元状态<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-18-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -473.20006936799257 867.1191013989369 650.8854201102238" style="width: 2.056ex; height: 1.452ex; vertical-align: -0.484ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use href="#MJMAINB-63"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-74" x="723" y="-213"></use></g></svg></span><script type="math/tex" id="MathJax-Element-18">\mathbf{c}_t</script>。注意<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-19-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -464.20006936799257 607.5 484.40013873598514" style="width: 1.452ex; height: 1.089ex; vertical-align: -0.121ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use href="#MJMAINB-78"></use></g></svg></span><script type="math/tex" id="MathJax-Element-19">\mathbf{x}</script>、<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-20-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -714.2000693679926 639.5 734.4001387359851" style="width: 1.452ex; height: 1.694ex; vertical-align: -0.121ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use href="#MJMAINB-68"></use></g></svg></span><script type="math/tex" id="MathJax-Element-20">\mathbf{h}</script>、<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-21-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -473.20006936799257 511.5 499.40013873598514" style="width: 1.21ex; height: 1.21ex; vertical-align: -0.121ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use href="#MJMAINB-63"></use></g></svg></span><script type="math/tex" id="MathJax-Element-21">\mathbf{c}</script>都是<strong>向量</strong>。</p><p data-anchor-id="xg08">LSTM的关键，就是怎样控制长期状态c。在这里，LSTM的思路是使用三个控制开关。第一个开关，负责控制继续保存长期状态c；第二个开关，负责控制把即时状态输入到长期状态c；第三个开关，负责控制是否把长期状态c作为当前的LSTM的输出。三个开关的作用如下图所示：</p><p data-anchor-id="fp5t"><img src="./零基础入门深度学习(6) - 长短时记忆网络(LSTM) - 作业部落 Cmd Markdown 编辑阅读器_files/2256672-bff9353b92b9c488.png" alt="" title=""></p><p data-anchor-id="6jgd">接下来，我们要描述一下，输出h和单元状态c的具体计算方法。</p><div class="md-section-divider"></div><h2 data-anchor-id="85h9" id="长短时记忆网络的前向计算">长短时记忆网络的前向计算</h2><p data-anchor-id="hypr">前面描述的开关是怎样在算法中实现的呢？这就用到了<strong>门（gate）</strong>的概念。门实际上就是一层<strong>全连接层</strong>，它的输入是一个向量，输出是一个0到1之间的实数向量。假设W是门的权重向量，<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-22-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -714.2000693679926 639.5 740.4001387359851" style="width: 1.452ex; height: 1.694ex; vertical-align: -0.121ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use href="#MJMAINB-62"></use></g></svg></span><script type="math/tex" id="MathJax-Element-22">\mathbf{b}</script>是偏置项，那么门可以表示为：</p><div class="md-section-divider"></div><p data-anchor-id="z337"><span class="MathJax_Preview"></span></p><div class="MathJax_SVG_Display" role="textbox" aria-readonly="true" style="text-align: center;"><span class="MathJax_SVG" id="MathJax-Element-23-Frame" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -770.2000693679926 8071.000000000001 1040.4001387359851" style="width: 18.75ex; height: 2.419ex; vertical-align: -0.726ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use href="#MJMATHI-67"></use><use href="#MJMAIN-28" x="480" y="0"></use><use href="#MJMAINB-78" x="870" y="0"></use><use href="#MJMAIN-29" x="1477" y="0"></use><use href="#MJMAIN-3D" x="2144" y="0"></use><use href="#MJMATHI-3C3" x="3201" y="0"></use><use href="#MJMAIN-28" x="3773" y="0"></use><use href="#MJMATHI-57" x="4163" y="0"></use><use href="#MJMAINB-78" x="5211" y="0"></use><use href="#MJMAIN-2B" x="6041" y="0"></use><use href="#MJMAINB-62" x="7042" y="0"></use><use href="#MJMAIN-29" x="7681" y="0"></use></g></svg></span></div><script type="math/tex; mode=display" id="MathJax-Element-23">
g(\mathbf{x})=\sigma(W\mathbf{x}+\mathbf{b})
</script><p></p><p data-anchor-id="tnw9">门的使用，就是用门的输出向量按元素乘以我们需要控制的那个向量。因为门的输出是0到1之间的实数向量，那么，当门输出为0时，任何向量与之相乘都会得到0向量，这就相当于啥都不能通过；输出为1时，任何向量与之相乘都不会有任何改变，这就相当于啥都可以通过。因为<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-24-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -451.20006936799257 572.5 482.40013873598514" style="width: 1.331ex; height: 1.089ex; vertical-align: -0.121ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use href="#MJMATHI-3C3"></use></g></svg></span><script type="math/tex" id="MathJax-Element-24">\sigma</script>（也就是sigmoid函数）的值域是(0,1)，所以门的状态都是半开半闭的。</p><p data-anchor-id="scty">LSTM用两个门来控制单元状态c的内容，一个是<strong>遗忘门（forget gate）</strong>，它决定了上一时刻的单元状态<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-25-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -473.20006936799257 1771.5086745365313 650.8854201102238" style="width: 4.113ex; height: 1.452ex; vertical-align: -0.484ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use href="#MJMAINB-63"></use><g transform="translate(511,-150)"><use transform="scale(0.7071067811865476)" href="#MJMATHI-74"></use><use transform="scale(0.7071067811865476)" href="#MJMAIN-2212" x="361" y="0"></use><use transform="scale(0.7071067811865476)" href="#MJMAIN-31" x="1140" y="0"></use></g></g></svg></span><script type="math/tex" id="MathJax-Element-25">\mathbf{c}_{t-1}</script>有多少保留到当前时刻<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-26-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -473.20006936799257 867.1191013989369 650.8854201102238" style="width: 2.056ex; height: 1.452ex; vertical-align: -0.484ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use href="#MJMAINB-63"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-74" x="723" y="-213"></use></g></svg></span><script type="math/tex" id="MathJax-Element-26">\mathbf{c}_t</script>；另一个是<strong>输入门（input gate）</strong>，它决定了当前时刻网络的输入<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-27-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -464.20006936799257 963.1191013989369 641.8854201102238" style="width: 2.177ex; height: 1.452ex; vertical-align: -0.484ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use href="#MJMAINB-78"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-74" x="859" y="-213"></use></g></svg></span><script type="math/tex" id="MathJax-Element-27">\mathbf{x}_t</script>有多少保存到单元状态<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-28-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -473.20006936799257 867.1191013989369 650.8854201102238" style="width: 2.056ex; height: 1.452ex; vertical-align: -0.484ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use href="#MJMAINB-63"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-74" x="723" y="-213"></use></g></svg></span><script type="math/tex" id="MathJax-Element-28">\mathbf{c}_t</script>。LSTM用<strong>输出门（output gate）</strong>来控制单元状态<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-29-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -473.20006936799257 867.1191013989369 650.8854201102238" style="width: 2.056ex; height: 1.452ex; vertical-align: -0.484ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use href="#MJMAINB-63"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-74" x="723" y="-213"></use></g></svg></span><script type="math/tex" id="MathJax-Element-29">\mathbf{c}_t</script>有多少输出到LSTM的当前输出值<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-30-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -714.2000693679926 995.1191013989369 891.8854201102238" style="width: 2.298ex; height: 2.056ex; vertical-align: -0.484ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use href="#MJMAINB-68"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-74" x="904" y="-213"></use></g></svg></span><script type="math/tex" id="MathJax-Element-30">\mathbf{h}_t</script>。</p><p data-anchor-id="pamy">我们先来看一下遗忘门：</p><div class="md-section-divider"></div><p data-anchor-id="f5ze"><span class="MathJax_Preview"></span></p><div class="MathJax_SVG_Display" role="textbox" aria-readonly="true" style="text-align: center;"><span class="MathJax_SVG" id="MathJax-Element-31-Frame" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -904.613537109928 16878.71287711255 1288.3956226069529" style="width: 39.194ex; height: 3.024ex; vertical-align: -0.968ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use href="#MJMAINB-66"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-74" x="497" y="-213"></use><use href="#MJMAIN-3D" x="984" y="0"></use><use href="#MJMATHI-3C3" x="2041" y="0"></use><use href="#MJMAIN-28" x="2613" y="0"></use><g transform="translate(3003,0)"><use href="#MJMATHI-57"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-66" x="1335" y="-219"></use></g><use href="#MJMAIN-22C5" x="4659" y="0"></use><use href="#MJMAIN-5B" x="5159" y="0"></use><g transform="translate(5438,0)"><use href="#MJMAINB-68"></use><g transform="translate(639,-150)"><use transform="scale(0.7071067811865476)" href="#MJMATHI-74"></use><use transform="scale(0.7071067811865476)" href="#MJMAIN-2212" x="361" y="0"></use><use transform="scale(0.7071067811865476)" href="#MJMAIN-31" x="1140" y="0"></use></g></g><use href="#MJMAIN-2C" x="7337" y="0"></use><g transform="translate(7783,0)"><use href="#MJMAINB-78"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-74" x="859" y="-213"></use></g><use href="#MJMAIN-5D" x="8746" y="0"></use><use href="#MJMAIN-2B" x="9246" y="0"></use><g transform="translate(10247,0)"><use href="#MJMAINB-62"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-66" x="904" y="-219"></use></g><use href="#MJMAIN-29" x="11376" y="0"></use><use href="#MJMAIN-28" x="14765" y="0"></use><g transform="translate(15155,0)"><text font-family="STIXGeneral,&#39;Arial Unicode MS&#39;,serif" font-style="" font-weight="" stroke="none" transform="scale(52.08314516129032) matrix(1 0 0 -1 0 0)">式</text></g><use href="#MJMAIN-31" x="15988" y="0"></use><use href="#MJMAIN-29" x="16489" y="0"></use></g></svg></span></div><script type="math/tex; mode=display" id="MathJax-Element-31">
\mathbf{f}_t=\sigma(W_f\cdot[\mathbf{h}_{t-1},\mathbf{x}_t]+\mathbf{b}_f)\qquad\quad(式1)
</script><p></p><p data-anchor-id="qx3b">上式中，<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-32-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -703.2000693679926 1433.7622830431944 1022.8383231781165" style="width: 3.387ex; height: 2.419ex; vertical-align: -0.847ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use href="#MJMATHI-57"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-66" x="1335" y="-219"></use></g></svg></span><script type="math/tex" id="MathJax-Element-32">W_f</script>是遗忘门的权重矩阵，<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-33-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -770.2000693679926 3864.794442602135 1040.4001387359851" style="width: 8.952ex; height: 2.419ex; vertical-align: -0.726ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use href="#MJMAIN-5B"></use><g transform="translate(278,0)"><use href="#MJMAINB-68"></use><g transform="translate(639,-150)"><use transform="scale(0.7071067811865476)" href="#MJMATHI-74"></use><use transform="scale(0.7071067811865476)" href="#MJMAIN-2212" x="361" y="0"></use><use transform="scale(0.7071067811865476)" href="#MJMAIN-31" x="1140" y="0"></use></g></g><use href="#MJMAIN-2C" x="2178" y="0"></use><g transform="translate(2623,0)"><use href="#MJMAINB-78"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-74" x="859" y="-213"></use></g><use href="#MJMAIN-5D" x="3586" y="0"></use></g></svg></span><script type="math/tex" id="MathJax-Element-33">[\mathbf{h}_{t-1},\mathbf{x}_t]</script>表示把两个向量连接成一个更长的向量，<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-34-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -714.2000693679926 1128.7622830431944 1033.8383231781165" style="width: 2.661ex; height: 2.419ex; vertical-align: -0.847ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use href="#MJMAINB-62"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-66" x="904" y="-219"></use></g></svg></span><script type="math/tex" id="MathJax-Element-34">\mathbf{b}_f</script>是遗忘门的偏置项，<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-35-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -451.20006936799257 572.5 482.40013873598514" style="width: 1.331ex; height: 1.089ex; vertical-align: -0.121ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use href="#MJMATHI-3C3"></use></g></svg></span><script type="math/tex" id="MathJax-Element-35">\sigma</script>是sigmoid函数。如果输入的维度是<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-36-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -714.2000693679926 1025.3186322292986 891.8854201102238" style="width: 2.419ex; height: 2.056ex; vertical-align: -0.484ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use href="#MJMATHI-64"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-78" x="736" y="-213"></use></g></svg></span><script type="math/tex" id="MathJax-Element-36">d_x</script>，隐藏层的维度是<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-37-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -714.2000693679926 1028.1470593540448 891.8854201102238" style="width: 2.419ex; height: 2.056ex; vertical-align: -0.484ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use href="#MJMATHI-64"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-68" x="736" y="-213"></use></g></svg></span><script type="math/tex" id="MathJax-Element-37">d_h</script>，单元状态的维度是<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-38-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -714.2000693679926 927.0307896443684 892.5925268914102" style="width: 2.177ex; height: 2.056ex; vertical-align: -0.484ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use href="#MJMATHI-64"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-63" x="736" y="-213"></use></g></svg></span><script type="math/tex" id="MathJax-Element-38">d_c</script>（通常<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-39-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -714.2000693679926 3289.2334045539687 892.5925268914102" style="width: 7.621ex; height: 2.056ex; vertical-align: -0.484ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use href="#MJMATHI-64"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-63" x="736" y="-213"></use><use href="#MJMAIN-3D" x="1204" y="0"></use><g transform="translate(2261,0)"><use href="#MJMATHI-64"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-68" x="736" y="-213"></use></g></g></svg></span><script type="math/tex" id="MathJax-Element-39">d_c=d_h</script>），则遗忘门的权重矩阵<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-40-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -703.2000693679926 1433.7622830431944 1022.8383231781165" style="width: 3.387ex; height: 2.419ex; vertical-align: -0.847ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use href="#MJMATHI-57"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-66" x="1335" y="-219"></use></g></svg></span><script type="math/tex" id="MathJax-Element-40">W_f</script>维度是<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-41-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -770.2000693679926 6205.385370116601 1040.4001387359851" style="width: 14.395ex; height: 2.419ex; vertical-align: -0.726ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use href="#MJMATHI-64"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-63" x="736" y="-213"></use><use href="#MJMAIN-D7" x="1149" y="0"></use><use href="#MJMAIN-28" x="2149" y="0"></use><g transform="translate(2539,0)"><use href="#MJMATHI-64"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-68" x="736" y="-213"></use></g><use href="#MJMAIN-2B" x="3789" y="0"></use><g transform="translate(4790,0)"><use href="#MJMATHI-64"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-78" x="736" y="-213"></use></g><use href="#MJMAIN-29" x="5815" y="0"></use></g></svg></span><script type="math/tex" id="MathJax-Element-41"> d_c\times (d_h+d_x)</script>。事实上，权重矩阵<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-42-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -703.2000693679926 1433.7622830431944 1022.8383231781165" style="width: 3.387ex; height: 2.419ex; vertical-align: -0.847ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use href="#MJMATHI-57"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-66" x="1335" y="-219"></use></g></svg></span><script type="math/tex" id="MathJax-Element-42">W_f</script>都是两个矩阵拼接而成的：一个是<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-43-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -703.2000693679926 1841.409342397239 1022.8383231781165" style="width: 4.234ex; height: 2.419ex; vertical-align: -0.847ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use href="#MJMATHI-57"></use><g transform="translate(944,-155)"><use transform="scale(0.7071067811865476)" href="#MJMATHI-66"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-68" x="550" y="0"></use></g></g></svg></span><script type="math/tex" id="MathJax-Element-43">W_{fh}</script>，它对应着输入项<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-44-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -714.2000693679926 1899.5086745365313 891.8854201102238" style="width: 4.355ex; height: 2.056ex; vertical-align: -0.484ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use href="#MJMAINB-68"></use><g transform="translate(639,-150)"><use transform="scale(0.7071067811865476)" href="#MJMATHI-74"></use><use transform="scale(0.7071067811865476)" href="#MJMAIN-2212" x="361" y="0"></use><use transform="scale(0.7071067811865476)" href="#MJMAIN-31" x="1140" y="0"></use></g></g></svg></span><script type="math/tex" id="MathJax-Element-44">\mathbf{h}_{t-1}</script>，其维度为<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-45-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -714.2000693679926 3178.122293442858 892.5925268914102" style="width: 7.379ex; height: 2.056ex; vertical-align: -0.484ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use href="#MJMATHI-64"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-63" x="736" y="-213"></use><use href="#MJMAIN-D7" x="1149" y="0"></use><g transform="translate(2149,0)"><use href="#MJMATHI-64"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-68" x="736" y="-213"></use></g></g></svg></span><script type="math/tex" id="MathJax-Element-45">d_c\times d_h</script>；一个是<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-46-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -703.2000693679926 1838.5809152724928 1022.8383231781165" style="width: 4.234ex; height: 2.419ex; vertical-align: -0.847ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use href="#MJMATHI-57"></use><g transform="translate(944,-155)"><use transform="scale(0.7071067811865476)" href="#MJMATHI-66"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-78" x="550" y="0"></use></g></g></svg></span><script type="math/tex" id="MathJax-Element-46">W_{fx}</script>，它对应着输入项<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-47-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -464.20006936799257 963.1191013989369 641.8854201102238" style="width: 2.177ex; height: 1.452ex; vertical-align: -0.484ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use href="#MJMAINB-78"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-74" x="859" y="-213"></use></g></svg></span><script type="math/tex" id="MathJax-Element-47">\mathbf{x}_t</script>，其维度为<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-48-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -714.2000693679926 3175.2938663181117 892.5925268914102" style="width: 7.379ex; height: 2.056ex; vertical-align: -0.484ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use href="#MJMATHI-64"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-63" x="736" y="-213"></use><use href="#MJMAIN-D7" x="1149" y="0"></use><g transform="translate(2149,0)"><use href="#MJMATHI-64"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-78" x="736" y="-213"></use></g></g></svg></span><script type="math/tex" id="MathJax-Element-48">d_c\times d_x</script>。<span class="MathJax_Preview"></span><span class="MathJax_SVG" id="MathJax-Element-49-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -703.2000693679926 1433.7622830431944 1022.8383231781165" style="width: 3.387ex; height: 2.419ex; vertical-align: -0.847ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use href="#MJMATHI-57"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-66" x="1335" y="-219"></use></g></svg></span><script type="math/tex" id="MathJax-Element-49">W_f</script>可以写为：</p><div class="md-section-divider"></div><p data-anchor-id="st68"><span class="MathJax_Preview"></span></p><div class="MathJax_SVG_Display" role="textbox" aria-readonly="true" style="text-align: center;"><span class="MathJax_SVG" id="MathJax-Element-50-Frame" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -2145.419161589058 44270.673387096766 3790.8383231781163" style="width: 102.823ex; height: 8.831ex; vertical-align: -3.871ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><g transform="translate(14154,0)"><g transform="translate(-15,0)"><g transform="translate(0,675)"><use href="#MJSZ1-5B"></use><g transform="translate(584,0)"><g transform="translate(-15,0)"><use href="#MJMATHI-57"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-66" x="1335" y="-219"></use></g></g><use href="#MJSZ1-5D" x="2170" y="-1"></use><g transform="translate(2588,0)"><use href="#MJSZ3-5B"></use><g transform="translate(695,0)"><g transform="translate(-15,0)"><g transform="translate(0,550)"><use href="#MJMAINB-68"></use><g transform="translate(639,-150)"><use transform="scale(0.7071067811865476)" href="#MJMATHI-74"></use><use transform="scale(0.7071067811865476)" href="#MJMAIN-2212" x="361" y="0"></use><use transform="scale(0.7071067811865476)" href="#MJMAIN-31" x="1140" y="0"></use></g></g><g transform="translate(468,-650)"><use href="#MJMAINB-78"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-74" x="859" y="-213"></use></g></g></g><use href="#MJSZ3-5D" x="2747" y="-1"></use></g></g></g><g transform="translate(6128,0)"><g transform="translate(0,675)"><use href="#MJMAIN-3D"></use><g transform="translate(1056,0)"><use href="#MJSZ1-5B"></use><g transform="translate(584,0)"><g transform="translate(-15,0)"><use href="#MJMATHI-57"></use><g transform="translate(944,-155)"><use transform="scale(0.7071067811865476)" href="#MJMATHI-66"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-68" x="550" y="0"></use></g></g><g transform="translate(2827,0)"><use href="#MJMATHI-57"></use><g transform="translate(944,-155)"><use transform="scale(0.7071067811865476)" href="#MJMATHI-66"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-78" x="550" y="0"></use></g></g></g><use href="#MJSZ1-5D" x="5417" y="-1"></use></g><g transform="translate(6890,0)"><use href="#MJSZ3-5B"></use><g transform="translate(695,0)"><g transform="translate(-15,0)"><g transform="translate(0,550)"><use href="#MJMAINB-68"></use><g transform="translate(639,-150)"><use transform="scale(0.7071067811865476)" href="#MJMATHI-74"></use><use transform="scale(0.7071067811865476)" href="#MJMAIN-2212" x="361" y="0"></use><use transform="scale(0.7071067811865476)" href="#MJMAIN-31" x="1140" y="0"></use></g></g><g transform="translate(468,-650)"><use href="#MJMAINB-78"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-74" x="859" y="-213"></use></g></g></g><use href="#MJSZ3-5D" x="2747" y="-1"></use></g></g><g transform="translate(0,-1326)"><use href="#MJMAIN-3D"></use><g transform="translate(1056,0)"><use href="#MJMATHI-57"></use><g transform="translate(944,-155)"><use transform="scale(0.7071067811865476)" href="#MJMATHI-66"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-68" x="550" y="0"></use></g></g><g transform="translate(2897,0)"><use href="#MJMAINB-68"></use><g transform="translate(639,-150)"><use transform="scale(0.7071067811865476)" href="#MJMATHI-74"></use><use transform="scale(0.7071067811865476)" href="#MJMAIN-2212" x="361" y="0"></use><use transform="scale(0.7071067811865476)" href="#MJMAIN-31" x="1140" y="0"></use></g></g><use href="#MJMAIN-2B" x="5019" y="0"></use><g transform="translate(6020,0)"><use href="#MJMATHI-57"></use><g transform="translate(944,-155)"><use transform="scale(0.7071067811865476)" href="#MJMATHI-66"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-78" x="550" y="0"></use></g></g><g transform="translate(7858,0)"><use href="#MJMAINB-78"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-74" x="859" y="-213"></use></g></g></g></g><g transform="translate(42191,0)"><g transform="translate(0,675)"><g id="mjx-eqn-6"><use href="#MJMAIN-28"></use><use href="#MJMAIN-36" x="389" y="0"></use><use href="#MJMAIN-29" x="890" y="0"></use></g></g><g transform="translate(0,-1326)"><g id="mjx-eqn-7"><use href="#MJMAIN-28"></use><use href="#MJMAIN-37" x="389" y="0"></use><use href="#MJMAIN-29" x="890" y="0"></use></g></g></g></g></svg></span></div><script type="math/tex; mode=display" id="MathJax-Element-50">
\begin{align}
\begin{bmatrix}W_f\end{bmatrix}\begin{bmatrix}\mathbf{h}_{t-1}\\
\mathbf{x}_t\end{bmatrix}&=
\begin{bmatrix}W_{fh}&W_{fx}\end{bmatrix}\begin{bmatrix}\mathbf{h}_{t-1}\\
\mathbf{x}_t\end{bmatrix}\\
&=W_{fh}\mathbf{h}_{t-1}+W_{fx}\mathbf{x}_t
\end{align}
</script><p></p><p data-anchor-id="bstm">下图显示了遗忘门的计算：</p><p data-anchor-id="5rac"><img src="./零基础入门深度学习(6) - 长短时记忆网络(LSTM) - 作业部落 Cmd Markdown 编辑阅读器_files/2256672-c7f7ca0aa64b562f.png" alt="" title=""></p><p data-anchor-id="no86">接下来看看输入门：</p><div class="md-section-divider"></div><p data-anchor-id="calh"><span class="MathJax_Preview"></span></p><div class="MathJax_SVG_Display MathJax_SVG_Processed" role="textbox" aria-readonly="true" style="text-align: center;"><span class="MathJax_SVG" id="MathJax-Element-51-Frame" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -904.613537109928 16556.799096826067 1288.3956226069529" style="width: 38.468ex; height: 3.024ex; vertical-align: -0.968ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use href="#MJMAINB-69"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-74" x="451" y="-213"></use><use href="#MJMAIN-3D" x="952" y="0"></use><use href="#MJMATHI-3C3" x="2009" y="0"></use><use href="#MJMAIN-28" x="2581" y="0"></use><g transform="translate(2971,0)"><use href="#MJMATHI-57"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-69" x="1335" y="-213"></use></g><use href="#MJMAIN-22C5" x="4482" y="0"></use><use href="#MJMAIN-5B" x="4982" y="0"></use><g transform="translate(5261,0)"><use href="#MJMAINB-68"></use><g transform="translate(639,-150)"><use transform="scale(0.7071067811865476)" href="#MJMATHI-74"></use><use transform="scale(0.7071067811865476)" href="#MJMAIN-2212" x="361" y="0"></use><use transform="scale(0.7071067811865476)" href="#MJMAIN-31" x="1140" y="0"></use></g></g><use href="#MJMAIN-2C" x="7160" y="0"></use><g transform="translate(7606,0)"><use href="#MJMAINB-78"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-74" x="859" y="-213"></use></g><use href="#MJMAIN-5D" x="8569" y="0"></use><use href="#MJMAIN-2B" x="9069" y="0"></use><g transform="translate(10070,0)"><use href="#MJMAINB-62"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-69" x="904" y="-213"></use></g><use href="#MJMAIN-29" x="11054" y="0"></use><use href="#MJMAIN-28" x="14443" y="0"></use><g transform="translate(14833,0)"><text font-family="STIXGeneral,&#39;Arial Unicode MS&#39;,serif" font-style="" font-weight="" stroke="none" transform="scale(52.08314516129032) matrix(1 0 0 -1 0 0)">式</text></g><use href="#MJMAIN-32" x="15666" y="0"></use><use href="#MJMAIN-29" x="16167" y="0"></use></g></svg></span></div><script type="math/tex; mode=display" id="MathJax-Element-51">
\mathbf{i}_t=\sigma(W_i\cdot[\mathbf{h}_{t-1},\mathbf{x}_t]+\mathbf{b}_i)\qquad\quad(式2)
</script><p></p><p data-anchor-id="9mu5">上式中，<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processed" id="MathJax-Element-52-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -703.2000693679926 1288.805392899952 880.8854201102238" style="width: 3.024ex; height: 2.056ex; vertical-align: -0.484ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use href="#MJMATHI-57"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-69" x="1335" y="-213"></use></g></svg></span><script type="math/tex" id="MathJax-Element-52">W_i</script>是输入门的权重矩阵，<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processed" id="MathJax-Element-53-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -714.2000693679926 983.8053928999522 891.8854201102238" style="width: 2.298ex; height: 2.056ex; vertical-align: -0.484ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use href="#MJMAINB-62"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-69" x="904" y="-213"></use></g></svg></span><script type="math/tex" id="MathJax-Element-53">\mathbf{b}_i</script>是输入门的偏置项。下图表示了输入门的计算：</p><p data-anchor-id="rnei"><img src="./零基础入门深度学习(6) - 长短时记忆网络(LSTM) - 作业部落 Cmd Markdown 编辑阅读器_files/2256672-89529fa23d9c8a7d.png" alt="" title=""></p><p data-anchor-id="43t9">接下来，我们计算用于描述当前输入的单元状态<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processed" id="MathJax-Element-54-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -695.324787109928 931.1191013989369 873.0101378521592" style="width: 2.177ex; height: 2.056ex; vertical-align: -0.484ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use href="#MJMAINB-63" x="32" y="0"></use><use href="#MJMAINB-7E" x="0" y="331"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-74" x="813" y="-213"></use></g></svg></span><script type="math/tex" id="MathJax-Element-54">\mathbf{\tilde{c}}_t</script>，它是根据上一次的输出和本次输入来计算的：</p><div class="md-section-divider"></div><p data-anchor-id="0dxn"><span class="MathJax_Preview"></span></p><div class="MathJax_SVG_Display MathJax_SVG_Processed" role="textbox" aria-readonly="true" style="text-align: center;"><span class="MathJax_SVG" id="MathJax-Element-55-Frame" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -904.613537109928 18367.7498903149 1288.3956226069529" style="width: 42.702ex; height: 3.024ex; vertical-align: -0.968ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use href="#MJMAINB-63" x="32" y="0"></use><use href="#MJMAINB-7E" x="0" y="331"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-74" x="813" y="-213"></use><use href="#MJMAIN-3D" x="1208" y="0"></use><g transform="translate(2265,0)"><use href="#MJMAIN-74"></use><use href="#MJMAIN-61" x="389" y="0"></use><use href="#MJMAIN-6E" x="890" y="0"></use><use href="#MJMAIN-68" x="1446" y="0"></use></g><use href="#MJMAIN-28" x="4268" y="0"></use><g transform="translate(4657,0)"><use href="#MJMATHI-57"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-63" x="1335" y="-213"></use></g><use href="#MJMAIN-22C5" x="6230" y="0"></use><use href="#MJMAIN-5B" x="6731" y="0"></use><g transform="translate(7010,0)"><use href="#MJMAINB-68"></use><g transform="translate(639,-150)"><use transform="scale(0.7071067811865476)" href="#MJMATHI-74"></use><use transform="scale(0.7071067811865476)" href="#MJMAIN-2212" x="361" y="0"></use><use transform="scale(0.7071067811865476)" href="#MJMAIN-31" x="1140" y="0"></use></g></g><use href="#MJMAIN-2C" x="8909" y="0"></use><g transform="translate(9354,0)"><use href="#MJMAINB-78"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-74" x="859" y="-213"></use></g><use href="#MJMAIN-5D" x="10317" y="0"></use><use href="#MJMAIN-2B" x="10818" y="0"></use><g transform="translate(11819,0)"><use href="#MJMAINB-62"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-63" x="904" y="-213"></use></g><use href="#MJMAIN-29" x="12865" y="0"></use><use href="#MJMAIN-28" x="16254" y="0"></use><g transform="translate(16644,0)"><text font-family="STIXGeneral,&#39;Arial Unicode MS&#39;,serif" font-style="" font-weight="" stroke="none" transform="scale(52.08314516129032) matrix(1 0 0 -1 0 0)">式</text></g><use href="#MJMAIN-33" x="17477" y="0"></use><use href="#MJMAIN-29" x="17978" y="0"></use></g></svg></span></div><script type="math/tex; mode=display" id="MathJax-Element-55">
\mathbf{\tilde{c}}_t=\tanh(W_c\cdot[\mathbf{h}_{t-1},\mathbf{x}_t]+\mathbf{b}_c)\qquad\quad(式3)
</script><p></p><p data-anchor-id="4clw">下图是<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processed" id="MathJax-Element-56-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -695.324787109928 931.1191013989369 873.0101378521592" style="width: 2.177ex; height: 2.056ex; vertical-align: -0.484ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use href="#MJMAINB-63" x="32" y="0"></use><use href="#MJMAINB-7E" x="0" y="331"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-74" x="813" y="-213"></use></g></svg></span><script type="math/tex" id="MathJax-Element-56">\mathbf{\tilde{c}}_t</script>的计算：</p><p data-anchor-id="77fh"><img src="./零基础入门深度学习(6) - 长短时记忆网络(LSTM) - 作业部落 Cmd Markdown 编辑阅读器_files/2256672-73a0246cafc1d10d.png" alt="" title=""></p><p data-anchor-id="7kds">现在，我们计算当前时刻的单元状态<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processed" id="MathJax-Element-57-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -473.20006936799257 867.1191013989369 650.8854201102238" style="width: 2.056ex; height: 1.452ex; vertical-align: -0.484ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use href="#MJMAINB-63"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-74" x="723" y="-213"></use></g></svg></span><script type="math/tex" id="MathJax-Element-57">\mathbf{c}_t</script>。它是由上一次的单元状态<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processed" id="MathJax-Element-58-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -473.20006936799257 1771.5086745365313 650.8854201102238" style="width: 4.113ex; height: 1.452ex; vertical-align: -0.484ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use href="#MJMAINB-63"></use><g transform="translate(511,-150)"><use transform="scale(0.7071067811865476)" href="#MJMATHI-74"></use><use transform="scale(0.7071067811865476)" href="#MJMAIN-2212" x="361" y="0"></use><use transform="scale(0.7071067811865476)" href="#MJMAIN-31" x="1140" y="0"></use></g></g></svg></span><script type="math/tex" id="MathJax-Element-58">\mathbf{c}_{t-1}</script>按元素乘以遗忘门<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processed" id="MathJax-Element-59-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -725.2000693679926 846.1191013989369 950.4001387359851" style="width: 1.935ex; height: 2.177ex; vertical-align: -0.605ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use href="#MJMATHI-66"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-74" x="693" y="-213"></use></g></svg></span><script type="math/tex" id="MathJax-Element-59">f_t</script>，再用当前输入的单元状态<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processed" id="MathJax-Element-60-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -695.324787109928 931.1191013989369 873.0101378521592" style="width: 2.177ex; height: 2.056ex; vertical-align: -0.484ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use href="#MJMAINB-63" x="32" y="0"></use><use href="#MJMAINB-7E" x="0" y="331"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-74" x="813" y="-213"></use></g></svg></span><script type="math/tex" id="MathJax-Element-60">\mathbf{\tilde{c}}_t</script>按元素乘以输入门<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processed" id="MathJax-Element-61-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -681.2000693679926 701.1191013989369 858.8854201102238" style="width: 1.573ex; height: 1.935ex; vertical-align: -0.484ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use href="#MJMATHI-69"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-74" x="488" y="-213"></use></g></svg></span><script type="math/tex" id="MathJax-Element-61">i_t</script>，再将两个积加和产生的：</p><div class="md-section-divider"></div><p data-anchor-id="4i0n"><span class="MathJax_Preview"></span></p><div class="MathJax_SVG_Display MathJax_SVG_Processed" role="textbox" aria-readonly="true" style="text-align: center;"><span class="MathJax_SVG" id="MathJax-Element-62-Frame" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -904.613537109928 14676.704291601814 1288.3956226069529" style="width: 34.113ex; height: 3.024ex; vertical-align: -0.968ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use href="#MJMAINB-63"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-74" x="723" y="-213"></use><use href="#MJMAIN-3D" x="1144" y="0"></use><g transform="translate(2201,0)"><use href="#MJMATHI-66"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-74" x="693" y="-213"></use></g><use href="#MJMAIN-2218" x="3269" y="0"></use><g transform="translate(3992,0)"><use href="#MJMAINB-63"></use><g transform="translate(511,-150)"><use transform="scale(0.7071067811865476)" href="#MJMATHI-74"></use><use transform="scale(0.7071067811865476)" href="#MJMAIN-2212" x="361" y="0"></use><use transform="scale(0.7071067811865476)" href="#MJMAIN-31" x="1140" y="0"></use></g></g><use href="#MJMAIN-2B" x="5985" y="0"></use><g transform="translate(6986,0)"><use href="#MJMATHI-69"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-74" x="488" y="-213"></use></g><use href="#MJMAIN-2218" x="7910" y="0"></use><g transform="translate(8632,0)"><use href="#MJMAINB-63" x="32" y="0"></use><use href="#MJMAINB-7E" x="0" y="331"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-74" x="813" y="-213"></use></g><use href="#MJMAIN-28" x="12563" y="0"></use><g transform="translate(12953,0)"><text font-family="STIXGeneral,&#39;Arial Unicode MS&#39;,serif" font-style="" font-weight="" stroke="none" transform="scale(52.08314516129032) matrix(1 0 0 -1 0 0)">式</text></g><use href="#MJMAIN-34" x="13786" y="0"></use><use href="#MJMAIN-29" x="14287" y="0"></use></g></svg></span></div><script type="math/tex; mode=display" id="MathJax-Element-62">
\mathbf{c}_t=f_t\circ{\mathbf{c}_{t-1}}+i_t\circ{\mathbf{\tilde{c}}_t}\qquad\quad(式4)
</script><p></p><p data-anchor-id="uka5">符号<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processed" id="MathJax-Element-63-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><span style="display: inline-block; white-space: nowrap; padding: 1px 0px;"><span style="display: inline-block; position: relative; width: 1.21ex; height: 0.968ex; vertical-align: 0.121ex;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -464.20006936799257 500.5 483.40013873598514" style="width: 1.21ex; height: 1.089ex; position: absolute; bottom: -0.121ex; left: 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use href="#MJMAIN-2218"></use></g></svg></span></span></span><script type="math/tex" id="MathJax-Element-63">\circ</script>表示<strong>按元素乘</strong>。下图是<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processed" id="MathJax-Element-64-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -473.20006936799257 867.1191013989369 650.8854201102238" style="width: 2.056ex; height: 1.452ex; vertical-align: -0.484ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use href="#MJMAINB-63"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-74" x="723" y="-213"></use></g></svg></span><script type="math/tex" id="MathJax-Element-64">\mathbf{c}_t</script>的计算：</p><p data-anchor-id="lmxh"><img src="./零基础入门深度学习(6) - 长短时记忆网络(LSTM) - 作业部落 Cmd Markdown 编辑阅读器_files/2256672-5c766f3d734334b1.png" alt="" title=""></p><p data-anchor-id="797c">这样，我们就把LSTM关于当前的记忆<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processed" id="MathJax-Element-65-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -695.324787109928 931.1191013989369 873.0101378521592" style="width: 2.177ex; height: 2.056ex; vertical-align: -0.484ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use href="#MJMAINB-63" x="32" y="0"></use><use href="#MJMAINB-7E" x="0" y="331"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-74" x="813" y="-213"></use></g></svg></span><script type="math/tex" id="MathJax-Element-65">\mathbf{\tilde{c}}_t</script>和长期的记忆<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processed" id="MathJax-Element-66-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -473.20006936799257 1771.5086745365313 650.8854201102238" style="width: 4.113ex; height: 1.452ex; vertical-align: -0.484ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use href="#MJMAINB-63"></use><g transform="translate(511,-150)"><use transform="scale(0.7071067811865476)" href="#MJMATHI-74"></use><use transform="scale(0.7071067811865476)" href="#MJMAIN-2212" x="361" y="0"></use><use transform="scale(0.7071067811865476)" href="#MJMAIN-31" x="1140" y="0"></use></g></g></svg></span><script type="math/tex" id="MathJax-Element-66">\mathbf{c}_{t-1}</script>组合在一起，形成了新的单元状态<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processed" id="MathJax-Element-67-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -473.20006936799257 867.1191013989369 650.8854201102238" style="width: 2.056ex; height: 1.452ex; vertical-align: -0.484ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use href="#MJMAINB-63"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-74" x="723" y="-213"></use></g></svg></span><script type="math/tex" id="MathJax-Element-67">\mathbf{c}_t</script>。由于遗忘门的控制，它可以保存很久很久之前的信息，由于输入门的控制，它又可以避免当前无关紧要的内容进入记忆。下面，我们要看看输出门，它控制了长期记忆对当前输出的影响：</p><div class="md-section-divider"></div><p data-anchor-id="f16j"><span class="MathJax_Preview"></span></p><div class="MathJax_SVG_Display MathJax_SVG_Processed" role="textbox" aria-readonly="true" style="text-align: center;"><span class="MathJax_SVG" id="MathJax-Element-68-Frame" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -904.613537109928 17010.7889955583 1288.3956226069529" style="width: 39.556ex; height: 3.024ex; vertical-align: -0.968ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use href="#MJMAINB-6F"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-74" x="813" y="-213"></use><use href="#MJMAIN-3D" x="1208" y="0"></use><use href="#MJMATHI-3C3" x="2265" y="0"></use><use href="#MJMAIN-28" x="2837" y="0"></use><g transform="translate(3227,0)"><use href="#MJMATHI-57"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-6F" x="1335" y="-213"></use></g><use href="#MJMAIN-22C5" x="4837" y="0"></use><use href="#MJMAIN-5B" x="5337" y="0"></use><g transform="translate(5616,0)"><use href="#MJMAINB-68"></use><g transform="translate(639,-150)"><use transform="scale(0.7071067811865476)" href="#MJMATHI-74"></use><use transform="scale(0.7071067811865476)" href="#MJMAIN-2212" x="361" y="0"></use><use transform="scale(0.7071067811865476)" href="#MJMAIN-31" x="1140" y="0"></use></g></g><use href="#MJMAIN-2C" x="7515" y="0"></use><g transform="translate(7961,0)"><use href="#MJMAINB-78"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-74" x="859" y="-213"></use></g><use href="#MJMAIN-5D" x="8924" y="0"></use><use href="#MJMAIN-2B" x="9424" y="0"></use><g transform="translate(10425,0)"><use href="#MJMAINB-62"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-6F" x="904" y="-213"></use></g><use href="#MJMAIN-29" x="11508" y="0"></use><use href="#MJMAIN-28" x="14897" y="0"></use><g transform="translate(15287,0)"><text font-family="STIXGeneral,&#39;Arial Unicode MS&#39;,serif" font-style="" font-weight="" stroke="none" transform="scale(52.08314516129032) matrix(1 0 0 -1 0 0)">式</text></g><use href="#MJMAIN-35" x="16120" y="0"></use><use href="#MJMAIN-29" x="16621" y="0"></use></g></svg></span></div><script type="math/tex; mode=display" id="MathJax-Element-68">
\mathbf{o}_t=\sigma(W_o\cdot[\mathbf{h}_{t-1},\mathbf{x}_t]+\mathbf{b}_o)\qquad\quad(式5)
</script><p></p><p data-anchor-id="57y5">下图表示输出门的计算：</p><p data-anchor-id="jpod"><img src="./零基础入门深度学习(6) - 长短时记忆网络(LSTM) - 作业部落 Cmd Markdown 编辑阅读器_files/2256672-fd4d91d1b68b3759.png" alt="" title=""></p><p data-anchor-id="mngi">LSTM最终的输出，是由输出门和单元状态共同确定的：</p><div class="md-section-divider"></div><p data-anchor-id="6seo"><span class="MathJax_Preview"></span></p><div class="MathJax_SVG_Display MathJax_SVG_Processed" role="textbox" aria-readonly="true" style="text-align: center;"><span class="MathJax_SVG" id="MathJax-Element-69-Frame" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -904.613537109928 12967.187626777455 1288.3956226069529" style="width: 30.121ex; height: 3.024ex; vertical-align: -0.968ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use href="#MJMAINB-68"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-74" x="904" y="-213"></use><use href="#MJMAIN-3D" x="1272" y="0"></use><g transform="translate(2329,0)"><use href="#MJMAINB-6F"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-74" x="813" y="-213"></use></g><use href="#MJMAIN-2218" x="3482" y="0"></use><g transform="translate(4205,0)"><use href="#MJMAIN-74"></use><use href="#MJMAIN-61" x="389" y="0"></use><use href="#MJMAIN-6E" x="890" y="0"></use><use href="#MJMAIN-68" x="1446" y="0"></use></g><use href="#MJMAIN-28" x="6208" y="0"></use><g transform="translate(6597,0)"><use href="#MJMAINB-63"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-74" x="723" y="-213"></use></g><use href="#MJMAIN-29" x="7464" y="0"></use><use href="#MJMAIN-28" x="10854" y="0"></use><g transform="translate(11243,0)"><text font-family="STIXGeneral,&#39;Arial Unicode MS&#39;,serif" font-style="" font-weight="" stroke="none" transform="scale(52.08314516129032) matrix(1 0 0 -1 0 0)">式</text></g><use href="#MJMAIN-36" x="12077" y="0"></use><use href="#MJMAIN-29" x="12577" y="0"></use></g></svg></span></div><script type="math/tex; mode=display" id="MathJax-Element-69">
\mathbf{h}_t=\mathbf{o}_t\circ \tanh(\mathbf{c}_t)\qquad\quad(式6)
</script><p></p><p data-anchor-id="4k07">下图表示LSTM最终输出的计算：</p><p data-anchor-id="v6dx"><img src="./零基础入门深度学习(6) - 长短时记忆网络(LSTM) - 作业部落 Cmd Markdown 编辑阅读器_files/2256672-7ea82e4f1ac6cd75.png" alt="" title=""></p><p data-anchor-id="2oqh"><strong>式1</strong>到<strong>式6</strong>就是LSTM前向计算的全部公式。至此，我们就把LSTM前向计算讲完了。</p><div class="md-section-divider"></div><h2 data-anchor-id="3gp6" id="长短时记忆网络的训练">长短时记忆网络的训练</h2><p data-anchor-id="viwl">熟悉我们这个系列文章的同学都清楚，训练部分往往比前向计算部分复杂多了。LSTM的前向计算都这么复杂，那么，可想而知，它的训练算法一定是非常非常复杂的。现在只有做几次深呼吸，再一头扎进公式海洋吧。</p><div class="md-section-divider"></div><h3 data-anchor-id="ddby" id="lstm训练算法框架">LSTM训练算法框架</h3><p data-anchor-id="kw2a">LSTM的训练算法仍然是反向传播算法，对于这个算法，我们已经非常熟悉了。主要有下面三个步骤：</p><ol data-anchor-id="8vxz">
<li>前向计算每个神经元的输出值，对于LSTM来说，即<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processed" id="MathJax-Element-70-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -720.2000693679926 707.1191013989369 897.8854201102238" style="width: 1.694ex; height: 2.056ex; vertical-align: -0.484ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use href="#MJMAINB-66"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-74" x="497" y="-213"></use></g></svg></span><script type="math/tex" id="MathJax-Element-70">\mathbf{f}_t</script>、<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processed" id="MathJax-Element-71-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -715.2000693679926 675.1191013989369 892.8854201102238" style="width: 1.573ex; height: 2.056ex; vertical-align: -0.484ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use href="#MJMAINB-69"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-74" x="451" y="-213"></use></g></svg></span><script type="math/tex" id="MathJax-Element-71">\mathbf{i}_t</script>、<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processed" id="MathJax-Element-72-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -473.20006936799257 867.1191013989369 650.8854201102238" style="width: 2.056ex; height: 1.452ex; vertical-align: -0.484ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use href="#MJMAINB-63"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-74" x="723" y="-213"></use></g></svg></span><script type="math/tex" id="MathJax-Element-72">\mathbf{c}_t</script>、<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processed" id="MathJax-Element-73-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -473.20006936799257 931.1191013989369 650.8854201102238" style="width: 2.177ex; height: 1.452ex; vertical-align: -0.484ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use href="#MJMAINB-6F"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-74" x="813" y="-213"></use></g></svg></span><script type="math/tex" id="MathJax-Element-73">\mathbf{o}_t</script>、<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processed" id="MathJax-Element-74-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -714.2000693679926 995.1191013989369 891.8854201102238" style="width: 2.298ex; height: 2.056ex; vertical-align: -0.484ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use href="#MJMAINB-68"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-74" x="904" y="-213"></use></g></svg></span><script type="math/tex" id="MathJax-Element-74">\mathbf{h}_t</script>五个向量的值。计算方法已经在上一节中描述过了。</li>
<li>反向计算每个神经元的<strong>误差项</strong><span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processed" id="MathJax-Element-75-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -737.2000693679926 451.5 767.4001387359851" style="width: 1.089ex; height: 1.815ex; vertical-align: -0.121ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use href="#MJMATHI-3B4"></use></g></svg></span><script type="math/tex" id="MathJax-Element-75">\delta</script>值。与<strong>循环神经网络</strong>一样，LSTM误差项的反向传播也是包括两个方向：一个是沿时间的反向传播，即从当前t时刻开始，计算每个时刻的误差项；一个是将误差项向上一层传播。</li>
<li>根据相应的误差项，计算每个权重的梯度。</li>
</ol><div class="md-section-divider"></div><h3 data-anchor-id="5qro" id="关于公式和符号的说明">关于公式和符号的说明</h3><p data-anchor-id="sje7">首先，我们对推导中用到的一些公式、符号做一下必要的说明。</p><p data-anchor-id="1aqy">接下来的推导中，我们设定gate的激活函数为sigmoid函数，输出的激活函数为tanh函数。他们的导数分别为：</p><div class="md-section-divider"></div><p data-anchor-id="alu9"><span class="MathJax_Preview"></span></p><div class="MathJax_SVG_Display MathJax_SVG_Processed" role="textbox" aria-readonly="true" style="text-align: center;"><span class="MathJax_SVG" id="MathJax-Element-76-Frame" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -3934.284785049353 44270.673387096766 7368.569570098706" style="width: 102.823ex; height: 17.056ex; vertical-align: -8.105ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><g transform="translate(16989,0)"><g transform="translate(-15,0)"><g transform="translate(1725,2571)"><use href="#MJMATHI-3C3"></use><use href="#MJMAIN-28" x="572" y="0"></use><use href="#MJMATHI-7A" x="962" y="0"></use><use href="#MJMAIN-29" x="1430" y="0"></use></g><g transform="translate(1430,693)"><use href="#MJMATHI-3C3"></use><use transform="scale(0.7071067811865476)" href="#MJMAIN-2032" x="810" y="583"></use><use href="#MJMAIN-28" x="867" y="0"></use><use href="#MJMATHI-7A" x="1257" y="0"></use><use href="#MJMAIN-29" x="1725" y="0"></use></g><g transform="translate(294,-1211)"><use href="#MJMAIN-74"></use><use href="#MJMAIN-61" x="389" y="0"></use><use href="#MJMAIN-6E" x="890" y="0"></use><use href="#MJMAIN-68" x="1446" y="0"></use><use href="#MJMAIN-28" x="2003" y="0"></use><use href="#MJMATHI-7A" x="2392" y="0"></use><use href="#MJMAIN-29" x="2861" y="0"></use></g><g transform="translate(0,-3165)"><use href="#MJMAIN-74"></use><use href="#MJMAIN-61" x="389" y="0"></use><use href="#MJMAIN-6E" x="890" y="0"></use><use href="#MJMAIN-68" x="1446" y="0"></use><use transform="scale(0.7071067811865476)" href="#MJMAIN-2032" x="2832" y="598"></use><use href="#MJMAIN-28" x="2297" y="0"></use><use href="#MJMATHI-7A" x="2687" y="0"></use><use href="#MJMAIN-29" x="3155" y="0"></use></g></g><g transform="translate(3808,0)"><g transform="translate(0,2571)"><use href="#MJMAIN-3D"></use><use href="#MJMATHI-79" x="1056" y="0"></use><use href="#MJMAIN-3D" x="1831" y="0"></use><g transform="translate(3007,0)"><rect stroke="none" width="3291" height="60" x="0" y="220"></rect><use href="#MJMAIN-31" x="1395" y="676"></use><g transform="translate(60,-686)"><use href="#MJMAIN-31"></use><use href="#MJMAIN-2B" x="722" y="0"></use><g transform="translate(1723,0)"><use href="#MJMATHI-65"></use><g transform="translate(466,288)"><use transform="scale(0.7071067811865476)" href="#MJMAIN-2212"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-7A" x="778" y="0"></use></g></g></g></g></g><g transform="translate(0,693)"><use href="#MJMAIN-3D"></use><use href="#MJMATHI-79" x="1056" y="0"></use><use href="#MJMAIN-28" x="1553" y="0"></use><use href="#MJMAIN-31" x="1943" y="0"></use><use href="#MJMAIN-2212" x="2666" y="0"></use><use href="#MJMATHI-79" x="3666" y="0"></use><use href="#MJMAIN-29" x="4164" y="0"></use></g><g transform="translate(0,-1211)"><use href="#MJMAIN-3D"></use><use href="#MJMATHI-79" x="1056" y="0"></use><use href="#MJMAIN-3D" x="1831" y="0"></use><g transform="translate(3007,0)"><rect stroke="none" width="3688" height="60" x="0" y="220"></rect><g transform="translate(60,676)"><use href="#MJMATHI-65"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-7A" x="659" y="513"></use><use href="#MJMAIN-2212" x="1120" y="0"></use><g transform="translate(2120,0)"><use href="#MJMATHI-65"></use><g transform="translate(466,362)"><use transform="scale(0.7071067811865476)" href="#MJMAIN-2212"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-7A" x="778" y="0"></use></g></g></g><g transform="translate(60,-686)"><use href="#MJMATHI-65"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-7A" x="659" y="408"></use><use href="#MJMAIN-2B" x="1120" y="0"></use><g transform="translate(2120,0)"><use href="#MJMATHI-65"></use><g transform="translate(466,288)"><use transform="scale(0.7071067811865476)" href="#MJMAIN-2212"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-7A" x="778" y="0"></use></g></g></g></g></g><g transform="translate(0,-3165)"><use href="#MJMAIN-3D"></use><use href="#MJMAIN-31" x="1056" y="0"></use><use href="#MJMAIN-2212" x="1779" y="0"></use><g transform="translate(2779,0)"><use href="#MJMATHI-79"></use><use transform="scale(0.7071067811865476)" href="#MJMAIN-32" x="706" y="583"></use></g></g></g></g><g transform="translate(41690,0)"><g transform="translate(500,2571)"><g id="mjx-eqn-8"><use href="#MJMAIN-28"></use><use href="#MJMAIN-38" x="389" y="0"></use><use href="#MJMAIN-29" x="890" y="0"></use></g></g><g transform="translate(500,693)"><g id="mjx-eqn-9"><use href="#MJMAIN-28"></use><use href="#MJMAIN-39" x="389" y="0"></use><use href="#MJMAIN-29" x="890" y="0"></use></g></g><g transform="translate(0,-1211)"><g id="mjx-eqn-10"><use href="#MJMAIN-28"></use><use href="#MJMAIN-31" x="389" y="0"></use><use href="#MJMAIN-30" x="890" y="0"></use><use href="#MJMAIN-29" x="1390" y="0"></use></g></g><g transform="translate(0,-3165)"><g id="mjx-eqn-11"><use href="#MJMAIN-28"></use><use href="#MJMAIN-31" x="389" y="0"></use><use href="#MJMAIN-31" x="890" y="0"></use><use href="#MJMAIN-29" x="1390" y="0"></use></g></g></g></g></svg></span></div><script type="math/tex; mode=display" id="MathJax-Element-76">
\begin{align}
\sigma(z)&=y=\frac{1}{1+e^{-z}}\\
\sigma'(z)&=y(1-y)\\
\tanh(z)&=y=\frac{e^z-e^{-z}}{e^z+e^{-z}}\\
\tanh'(z)&=1-y^2
\end{align}
</script><p></p><p data-anchor-id="9gpm">从上面可以看出，sigmoid和tanh函数的导数都是原函数的函数。这样，我们一旦计算原函数的值，就可以用它来计算出导数的值。</p><p data-anchor-id="eas6">LSTM需要学习的参数共有8组，分别是：遗忘门的权重矩阵<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processed" id="MathJax-Element-77-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -703.2000693679926 1433.7622830431944 1022.8383231781165" style="width: 3.387ex; height: 2.419ex; vertical-align: -0.847ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use href="#MJMATHI-57"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-66" x="1335" y="-219"></use></g></svg></span><script type="math/tex" id="MathJax-Element-77">W_f</script>和偏置项<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processed" id="MathJax-Element-78-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -714.2000693679926 1128.7622830431944 1033.8383231781165" style="width: 2.661ex; height: 2.419ex; vertical-align: -0.847ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use href="#MJMAINB-62"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-66" x="904" y="-219"></use></g></svg></span><script type="math/tex" id="MathJax-Element-78">\mathbf{b}_f</script>、输入门的权重矩阵<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processed" id="MathJax-Element-79-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -703.2000693679926 1288.805392899952 880.8854201102238" style="width: 3.024ex; height: 2.056ex; vertical-align: -0.484ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use href="#MJMATHI-57"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-69" x="1335" y="-213"></use></g></svg></span><script type="math/tex" id="MathJax-Element-79">W_i</script>和偏置项<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processed" id="MathJax-Element-80-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -714.2000693679926 983.8053928999522 891.8854201102238" style="width: 2.298ex; height: 2.056ex; vertical-align: -0.484ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use href="#MJMAINB-62"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-69" x="904" y="-213"></use></g></svg></span><script type="math/tex" id="MathJax-Element-80">\mathbf{b}_i</script>、输出门的权重矩阵<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processed" id="MathJax-Element-81-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -703.2000693679926 1387.8003422660688 880.8854201102238" style="width: 3.266ex; height: 2.056ex; vertical-align: -0.484ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use href="#MJMATHI-57"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-6F" x="1335" y="-213"></use></g></svg></span><script type="math/tex" id="MathJax-Element-81">W_o</script>和偏置项<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processed" id="MathJax-Element-82-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -714.2000693679926 1082.8003422660688 891.8854201102238" style="width: 2.54ex; height: 2.056ex; vertical-align: -0.484ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use href="#MJMAINB-62"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-6F" x="904" y="-213"></use></g></svg></span><script type="math/tex" id="MathJax-Element-82">\mathbf{b}_o</script>，以及计算单元状态的权重矩阵<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processed" id="MathJax-Element-83-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -703.2000693679926 1351.0307896443683 881.5925268914102" style="width: 3.145ex; height: 2.056ex; vertical-align: -0.484ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use href="#MJMATHI-57"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-63" x="1335" y="-213"></use></g></svg></span><script type="math/tex" id="MathJax-Element-83">W_c</script>和偏置项<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processed" id="MathJax-Element-84-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -714.2000693679926 1046.0307896443683 892.5925268914102" style="width: 2.419ex; height: 2.056ex; vertical-align: -0.484ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use href="#MJMAINB-62"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-63" x="904" y="-213"></use></g></svg></span><script type="math/tex" id="MathJax-Element-84">\mathbf{b}_c</script>。因为权重矩阵的两部分在反向传播中使用不同的公式，因此在后续的推导中，权重矩阵<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processed" id="MathJax-Element-85-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -703.2000693679926 1433.7622830431944 1022.8383231781165" style="width: 3.387ex; height: 2.419ex; vertical-align: -0.847ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use href="#MJMATHI-57"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-66" x="1335" y="-219"></use></g></svg></span><script type="math/tex" id="MathJax-Element-85">W_f</script>、<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processed" id="MathJax-Element-86-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -703.2000693679926 1288.805392899952 880.8854201102238" style="width: 3.024ex; height: 2.056ex; vertical-align: -0.484ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use href="#MJMATHI-57"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-69" x="1335" y="-213"></use></g></svg></span><script type="math/tex" id="MathJax-Element-86">W_i</script>、<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processed" id="MathJax-Element-87-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -703.2000693679926 1351.0307896443683 881.5925268914102" style="width: 3.145ex; height: 2.056ex; vertical-align: -0.484ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use href="#MJMATHI-57"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-63" x="1335" y="-213"></use></g></svg></span><script type="math/tex" id="MathJax-Element-87">W_c</script>、<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processed" id="MathJax-Element-88-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -703.2000693679926 1387.8003422660688 880.8854201102238" style="width: 3.266ex; height: 2.056ex; vertical-align: -0.484ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use href="#MJMATHI-57"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-6F" x="1335" y="-213"></use></g></svg></span><script type="math/tex" id="MathJax-Element-88">W_o</script>都将被写为分开的两个矩阵：<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processed" id="MathJax-Element-89-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -703.2000693679926 1841.409342397239 1022.8383231781165" style="width: 4.234ex; height: 2.419ex; vertical-align: -0.847ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use href="#MJMATHI-57"></use><g transform="translate(944,-155)"><use transform="scale(0.7071067811865476)" href="#MJMATHI-66"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-68" x="550" y="0"></use></g></g></svg></span><script type="math/tex" id="MathJax-Element-89">W_{fh}</script>、<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processed" id="MathJax-Element-90-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -703.2000693679926 1838.5809152724928 1022.8383231781165" style="width: 4.234ex; height: 2.419ex; vertical-align: -0.847ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use href="#MJMATHI-57"></use><g transform="translate(944,-155)"><use transform="scale(0.7071067811865476)" href="#MJMATHI-66"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-78" x="550" y="0"></use></g></g></svg></span><script type="math/tex" id="MathJax-Element-90">W_{fx}</script>、<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processed" id="MathJax-Element-91-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -703.2000693679926 1696.4524522539969 880.8854201102238" style="width: 3.992ex; height: 2.056ex; vertical-align: -0.484ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use href="#MJMATHI-57"></use><g transform="translate(944,-150)"><use transform="scale(0.7071067811865476)" href="#MJMATHI-69"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-68" x="345" y="0"></use></g></g></svg></span><script type="math/tex" id="MathJax-Element-91">W_{ih}</script>、<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processed" id="MathJax-Element-92-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -703.2000693679926 1693.6240251292506 880.8854201102238" style="width: 3.992ex; height: 2.056ex; vertical-align: -0.484ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use href="#MJMATHI-57"></use><g transform="translate(944,-150)"><use transform="scale(0.7071067811865476)" href="#MJMATHI-69"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-78" x="345" y="0"></use></g></g></svg></span><script type="math/tex" id="MathJax-Element-92">W_{ix}</script>、<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processed" id="MathJax-Element-93-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -703.2000693679926 1795.4474016201136 880.8854201102238" style="width: 4.113ex; height: 2.056ex; vertical-align: -0.484ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use href="#MJMATHI-57"></use><g transform="translate(944,-150)"><use transform="scale(0.7071067811865476)" href="#MJMATHI-6F"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-68" x="485" y="0"></use></g></g></svg></span><script type="math/tex" id="MathJax-Element-93">W_{oh}</script>、<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processed" id="MathJax-Element-94-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -703.2000693679926 1792.6189744953674 880.8854201102238" style="width: 4.113ex; height: 2.056ex; vertical-align: -0.484ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use href="#MJMATHI-57"></use><g transform="translate(944,-150)"><use transform="scale(0.7071067811865476)" href="#MJMATHI-6F"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-78" x="485" y="0"></use></g></g></svg></span><script type="math/tex" id="MathJax-Element-94">W_{ox}</script>、<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processed" id="MathJax-Element-95-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -703.2000693679926 1758.677848998413 881.5925268914102" style="width: 4.113ex; height: 2.056ex; vertical-align: -0.484ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use href="#MJMATHI-57"></use><g transform="translate(944,-150)"><use transform="scale(0.7071067811865476)" href="#MJMATHI-63"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-68" x="433" y="0"></use></g></g></svg></span><script type="math/tex" id="MathJax-Element-95">W_{ch}</script>、<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processed" id="MathJax-Element-96-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -703.2000693679926 1755.8494218736669 881.5925268914102" style="width: 4.113ex; height: 2.056ex; vertical-align: -0.484ex; margin: 1px 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use href="#MJMATHI-57"></use><g transform="translate(944,-150)"><use transform="scale(0.7071067811865476)" href="#MJMATHI-63"></use><use transform="scale(0.7071067811865476)" href="#MJMATHI-78" x="433" y="0"></use></g></g></svg></span><script type="math/tex" id="MathJax-Element-96">W_{cx}</script>。</p><p data-anchor-id="0r8f">我们解释一下按元素乘<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processed" id="MathJax-Element-97-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><span style="display: inline-block; white-space: nowrap; padding: 1px 0px;"><span style="display: inline-block; position: relative; width: 1.21ex; height: 0.968ex; vertical-align: 0.121ex;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -464.20006936799257 500.5 483.40013873598514" style="width: 1.21ex; height: 1.089ex; position: absolute; bottom: -0.121ex; left: 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use href="#MJMAIN-2218"></use></g></svg></span></span></span><script type="math/tex" id="MathJax-Element-97">\circ</script>符号。当<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processed" id="MathJax-Element-98-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"><span style="display: inline-block; white-space: nowrap; padding: 1px 0px;"><span style="display: inline-block; position: relative; width: 1.21ex; height: 0.968ex; vertical-align: 0.121ex;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 -464.20006936799257 500.5 483.40013873598514" style="width: 1.21ex; height: 1.089ex; position: absolute; bottom: -0.121ex; left: 0px;"><g stroke="black" fill="black" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use href="#MJMAIN-2218"></use></g></svg></span></span></span><script type="math/tex" id="MathJax-Element-98">\circ</script>作用于两个<strong>向量</strong>时，运算如下：</p><div class="md-section-divider"></div><p data-anchor-id="1rvs"><span class="MathJax_Preview"></span></p><div class="MathJax_SVG_Display MathJax_SVG_Processing" role="textbox" aria-readonly="true"><span class="MathJax_SVG" id="MathJax-Element-99-Frame" style="font-size: 100%; display: inline-block;"></span></div><script type="math/tex; mode=display" id="MathJax-Element-99">
\mathbf{a}\circ\mathbf{b}=\begin{bmatrix}
a_1\\a_2\\a_3\\...\\a_n
\end{bmatrix}\circ\begin{bmatrix}
b_1\\b_2\\b_3\\...\\b_n
\end{bmatrix}=\begin{bmatrix}
a_1b_1\\a_2b_2\\a_3b_3\\...\\a_nb_n
\end{bmatrix}
</script><p></p><p data-anchor-id="gzxw">当<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processing" id="MathJax-Element-100-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"></span><script type="math/tex" id="MathJax-Element-100">\circ</script>作用于一个<strong>向量</strong>和一个<strong>矩阵</strong>时，运算如下：</p><div class="md-section-divider"></div><p data-anchor-id="q9ib"><span class="MathJax_Preview"></span></p><div class="MathJax_SVG_Display MathJax_SVG_Processing" role="textbox" aria-readonly="true"><span class="MathJax_SVG" id="MathJax-Element-101-Frame" style="font-size: 100%; display: inline-block;"></span></div><script type="math/tex; mode=display" id="MathJax-Element-101">
\begin{align}
\mathbf{a}\circ X&=\begin{bmatrix}
a_1\\a_2\\a_3\\...\\a_n
\end{bmatrix}\circ\begin{bmatrix}
x_{11} & x_{12} & x_{13} & ... & x_{1n}\\
x_{21} & x_{22} & x_{23} & ... & x_{2n}\\
x_{31} & x_{32} & x_{33} & ... & x_{3n}\\
& & ...\\
x_{n1} & x_{n2} & x_{n3} & ... & x_{nn}\\
\end{bmatrix}\\
&=\begin{bmatrix}
a_1x_{11} & a_1x_{12} & a_1x_{13} & ... & a_1x_{1n}\\
a_2x_{21} & a_2x_{22} & a_2x_{23} & ... & a_2x_{2n}\\
a_3x_{31} & a_3x_{32} & a_3x_{33} & ... & a_3x_{3n}\\
& & ...\\
a_nx_{n1} & a_nx_{n2} & a_nx_{n3} & ... & a_nx_{nn}\\
\end{bmatrix}
\end{align}
</script><p></p><p data-anchor-id="r40q">当<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processing" id="MathJax-Element-102-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"></span><script type="math/tex" id="MathJax-Element-102">\circ</script>作用于两个<strong>矩阵</strong>时，两个矩阵对应位置的元素相乘。按元素乘可以在某些情况下简化矩阵和向量运算。例如，当一个对角矩阵右乘一个矩阵时，相当于用对角矩阵的对角线组成的向量按元素乘那个矩阵：</p><div class="md-section-divider"></div><p data-anchor-id="txtg"><span class="MathJax_Preview"></span></p><div class="MathJax_SVG_Display MathJax_SVG_Processing" role="textbox" aria-readonly="true"><span class="MathJax_SVG" id="MathJax-Element-103-Frame" style="font-size: 100%; display: inline-block;"></span></div><script type="math/tex; mode=display" id="MathJax-Element-103">
diag[\mathbf{a}]X=\mathbf{a}\circ X
</script><p></p><p data-anchor-id="nh1h">当一个行向量右乘一个对角矩阵时，相当于这个行向量按元素乘那个矩阵对角线组成的向量：</p><div class="md-section-divider"></div><p data-anchor-id="x3wo"><span class="MathJax_Preview"></span></p><div class="MathJax_SVG_Display MathJax_SVG_Processing" role="textbox" aria-readonly="true"><span class="MathJax_SVG" id="MathJax-Element-104-Frame" style="font-size: 100%; display: inline-block;"></span></div><script type="math/tex; mode=display" id="MathJax-Element-104">
\mathbf{a}^Tdiag[\mathbf{b}]=\mathbf{a}\circ\mathbf{b}
</script><p></p><p data-anchor-id="5c26">上面这两点，在我们后续推导中会多次用到。</p><p data-anchor-id="oqlf">在t时刻，LSTM的输出值为<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processing" id="MathJax-Element-105-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"></span><script type="math/tex" id="MathJax-Element-105">\mathbf{h}_t</script>。我们定义t时刻的误差项<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processing" id="MathJax-Element-106-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"></span><script type="math/tex" id="MathJax-Element-106">\delta_t</script>为：</p><div class="md-section-divider"></div><p data-anchor-id="x2xl"><span class="MathJax_Preview"></span></p><div class="MathJax_SVG_Display MathJax_SVG_Processing" role="textbox" aria-readonly="true"><span class="MathJax_SVG" id="MathJax-Element-107-Frame" style="font-size: 100%; display: inline-block;"></span></div><script type="math/tex; mode=display" id="MathJax-Element-107">
\delta_t\overset{def}{=}\frac{\partial{E}}{\partial{\mathbf{h}_t}}
</script><p></p><p data-anchor-id="n3iu">注意，和前面几篇文章不同，我们这里假设误差项是损失函数对输出值的导数，而不是对加权输入<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processing" id="MathJax-Element-108-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"></span><script type="math/tex" id="MathJax-Element-108">net_t^l</script>的导数。因为LSTM有四个加权输入，分别对应<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processing" id="MathJax-Element-109-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"></span><script type="math/tex" id="MathJax-Element-109">\mathbf{f}_t</script>、<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processing" id="MathJax-Element-110-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"></span><script type="math/tex" id="MathJax-Element-110">\mathbf{i}_t</script>、<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processing" id="MathJax-Element-111-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"></span><script type="math/tex" id="MathJax-Element-111">\mathbf{c}_t</script>、<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processing" id="MathJax-Element-112-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"></span><script type="math/tex" id="MathJax-Element-112">\mathbf{o}_t</script>，我们希望往上一层传递一个误差项而不是四个。但我们仍然需要定义出这四个加权输入，以及他们对应的误差项。</p><div class="md-section-divider"></div><p data-anchor-id="omdq"><span class="MathJax_Preview"></span></p><div class="MathJax_SVG_Display MathJax_SVG_Processing" role="textbox" aria-readonly="true"><span class="MathJax_SVG" id="MathJax-Element-113-Frame" style="font-size: 100%; display: inline-block;"></span></div><script type="math/tex; mode=display" id="MathJax-Element-113">
\begin{align}
\mathbf{net}_{f,t}&=W_f[\mathbf{h}_{t-1},\mathbf{x}_t]+\mathbf{b}_f\\
&=W_{fh}\mathbf{h}_{t-1}+W_{fx}\mathbf{x}_t+\mathbf{b}_f\\
\mathbf{net}_{i,t}&=W_i[\mathbf{h}_{t-1},\mathbf{x}_t]+\mathbf{b}_i\\
&=W_{ih}\mathbf{h}_{t-1}+W_{ix}\mathbf{x}_t+\mathbf{b}_i\\
\mathbf{net}_{\tilde{c},t}&=W_c[\mathbf{h}_{t-1},\mathbf{x}_t]+\mathbf{b}_c\\
&=W_{ch}\mathbf{h}_{t-1}+W_{cx}\mathbf{x}_t+\mathbf{b}_c\\
\mathbf{net}_{o,t}&=W_o[\mathbf{h}_{t-1},\mathbf{x}_t]+\mathbf{b}_o\\
&=W_{oh}\mathbf{h}_{t-1}+W_{ox}\mathbf{x}_t+\mathbf{b}_o\\
\delta_{f,t}&\overset{def}{=}\frac{\partial{E}}{\partial{\mathbf{net}_{f,t}}}\\
\delta_{i,t}&\overset{def}{=}\frac{\partial{E}}{\partial{\mathbf{net}_{i,t}}}\\
\delta_{\tilde{c},t}&\overset{def}{=}\frac{\partial{E}}{\partial{\mathbf{net}_{\tilde{c},t}}}\\
\delta_{o,t}&\overset{def}{=}\frac{\partial{E}}{\partial{\mathbf{net}_{o,t}}}\\
\end{align}
</script><p></p><div class="md-section-divider"></div><h3 data-anchor-id="3gn9" id="误差项沿时间的反向传递">误差项沿时间的反向传递</h3><p data-anchor-id="nmdu">沿时间反向传递误差项，就是要计算出t-1时刻的误差项<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processing" id="MathJax-Element-114-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"></span><script type="math/tex" id="MathJax-Element-114">\delta_{t-1}</script>。</p><div class="md-section-divider"></div><p data-anchor-id="q36y"><span class="MathJax_Preview"></span></p><div class="MathJax_SVG_Display MathJax_SVG_Processing" role="textbox" aria-readonly="true"><span class="MathJax_SVG" id="MathJax-Element-115-Frame" style="font-size: 100%; display: inline-block;"></span></div><script type="math/tex; mode=display" id="MathJax-Element-115">
\begin{align}
\delta_{t-1}^T&=\frac{\partial{E}}{\partial{\mathbf{h_{t-1}}}}\\
&=\frac{\partial{E}}{\partial{\mathbf{h_t}}}\frac{\partial{\mathbf{h_t}}}{\partial{\mathbf{h_{t-1}}}}\\
&=\delta_{t}^T\frac{\partial{\mathbf{h_t}}}{\partial{\mathbf{h_{t-1}}}}
\end{align}
</script><p></p><p data-anchor-id="5dlw">我们知道，<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processing" id="MathJax-Element-116-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"></span><script type="math/tex" id="MathJax-Element-116">\frac{\partial{\mathbf{h_t}}}{\partial{\mathbf{h_{t-1}}}}</script>是一个Jacobian矩阵。如果隐藏层h的维度是N的话，那么它就是一个<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processing" id="MathJax-Element-117-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"></span><script type="math/tex" id="MathJax-Element-117">N\times N</script>矩阵。为了求出它，我们列出<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processing" id="MathJax-Element-118-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"></span><script type="math/tex" id="MathJax-Element-118">\mathbf{h}_t</script>的计算公式，即前面的<strong>式6</strong>和<strong>式4</strong>：</p><div class="md-section-divider"></div><p data-anchor-id="s97a"><span class="MathJax_Preview"></span></p><div class="MathJax_SVG_Display MathJax_SVG_Processing" role="textbox" aria-readonly="true"><span class="MathJax_SVG" id="MathJax-Element-119-Frame" style="font-size: 100%; display: inline-block;"></span></div><script type="math/tex; mode=display" id="MathJax-Element-119">
\begin{align}
\mathbf{h}_t&=\mathbf{o}_t\circ \tanh(\mathbf{c}_t)\\
\mathbf{c}_t&=\mathbf{f}_t\circ\mathbf{c}_{t-1}+\mathbf{i}_t\circ\mathbf{\tilde{c}}_t
\end{align}
</script><p></p><p data-anchor-id="tr0o">显然，<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processing" id="MathJax-Element-120-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"></span><script type="math/tex" id="MathJax-Element-120">\mathbf{o}_t</script>、<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processing" id="MathJax-Element-121-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"></span><script type="math/tex" id="MathJax-Element-121">\mathbf{f}_t</script>、<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processing" id="MathJax-Element-122-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"></span><script type="math/tex" id="MathJax-Element-122">\mathbf{i}_t</script>、<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processing" id="MathJax-Element-123-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"></span><script type="math/tex" id="MathJax-Element-123">\mathbf{\tilde{c}}_t</script>都是<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processing" id="MathJax-Element-124-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"></span><script type="math/tex" id="MathJax-Element-124">\mathbf{h}_{t-1}</script>的函数，那么，利用全导数公式可得：</p><div class="md-section-divider"></div><p data-anchor-id="p2ha"><span class="MathJax_Preview"></span></p><div class="MathJax_SVG_Display MathJax_SVG_Processing" role="textbox" aria-readonly="true"><span class="MathJax_SVG" id="MathJax-Element-125-Frame" style="font-size: 100%; display: inline-block;"></span></div><script type="math/tex; mode=display" id="MathJax-Element-125">
\begin{align}
\delta_t^T\frac{\partial{\mathbf{h_t}}}{\partial{\mathbf{h_{t-1}}}}&=\delta_t^T\frac{\partial{\mathbf{h_t}}}{\partial{\mathbf{o}_t}}\frac{\partial{\mathbf{o}_t}}{\partial{\mathbf{net}_{o,t}}}\frac{\partial{\mathbf{net}_{o,t}}}{\partial{\mathbf{h_{t-1}}}}
+\delta_t^T\frac{\partial{\mathbf{h_t}}}{\partial{\mathbf{c}_t}}\frac{\partial{\mathbf{c}_t}}{\partial{\mathbf{f_{t}}}}\frac{\partial{\mathbf{f}_t}}{\partial{\mathbf{net}_{f,t}}}\frac{\partial{\mathbf{net}_{f,t}}}{\partial{\mathbf{h_{t-1}}}}
+\delta_t^T\frac{\partial{\mathbf{h_t}}}{\partial{\mathbf{c}_t}}\frac{\partial{\mathbf{c}_t}}{\partial{\mathbf{i_{t}}}}\frac{\partial{\mathbf{i}_t}}{\partial{\mathbf{net}_{i,t}}}\frac{\partial{\mathbf{net}_{i,t}}}{\partial{\mathbf{h_{t-1}}}}
+\delta_t^T\frac{\partial{\mathbf{h_t}}}{\partial{\mathbf{c}_t}}\frac{\partial{\mathbf{c}_t}}{\partial{\mathbf{\tilde{c}}_{t}}}\frac{\partial{\mathbf{\tilde{c}}_t}}{\partial{\mathbf{net}_{\tilde{c},t}}}\frac{\partial{\mathbf{net}_{\tilde{c},t}}}{\partial{\mathbf{h_{t-1}}}}\\
&=\delta_{o,t}^T\frac{\partial{\mathbf{net}_{o,t}}}{\partial{\mathbf{h_{t-1}}}}
+\delta_{f,t}^T\frac{\partial{\mathbf{net}_{f,t}}}{\partial{\mathbf{h_{t-1}}}}
+\delta_{i,t}^T\frac{\partial{\mathbf{net}_{i,t}}}{\partial{\mathbf{h_{t-1}}}}
+\delta_{\tilde{c},t}^T\frac{\partial{\mathbf{net}_{\tilde{c},t}}}{\partial{\mathbf{h_{t-1}}}}\qquad\quad(式7)
\end{align}
</script><p></p><p data-anchor-id="kj1h">下面，我们要把<strong>式7</strong>中的每个偏导数都求出来。根据<strong>式6</strong>，我们可以求出：</p><div class="md-section-divider"></div><p data-anchor-id="2per"><span class="MathJax_Preview"></span></p><div class="MathJax_SVG_Display MathJax_SVG_Processing" role="textbox" aria-readonly="true"><span class="MathJax_SVG" id="MathJax-Element-126-Frame" style="font-size: 100%; display: inline-block;"></span></div><script type="math/tex; mode=display" id="MathJax-Element-126">
\begin{align}
\frac{\partial{\mathbf{h_t}}}{\partial{\mathbf{o}_t}}&=diag[\tanh(\mathbf{c}_t)]\\
\frac{\partial{\mathbf{h_t}}}{\partial{\mathbf{c}_t}}&=diag[\mathbf{o}_t\circ(1-\tanh(\mathbf{c}_t)^2)]
\end{align}
</script><p></p><p data-anchor-id="0yv6">根据<strong>式4</strong>，我们可以求出：</p><div class="md-section-divider"></div><p data-anchor-id="tpun"><span class="MathJax_Preview"></span></p><div class="MathJax_SVG_Display MathJax_SVG_Processing" role="textbox" aria-readonly="true"><span class="MathJax_SVG" id="MathJax-Element-127-Frame" style="font-size: 100%; display: inline-block;"></span></div><script type="math/tex; mode=display" id="MathJax-Element-127">
\begin{align}
\frac{\partial{\mathbf{c}_t}}{\partial{\mathbf{f_{t}}}}&=diag[\mathbf{c}_{t-1}]\\
\frac{\partial{\mathbf{c}_t}}{\partial{\mathbf{i_{t}}}}&=diag[\mathbf{\tilde{c}}_t]\\
\frac{\partial{\mathbf{c}_t}}{\partial{\mathbf{\tilde{c}_{t}}}}&=diag[\mathbf{i}_t]\\
\end{align}
</script><p></p><p data-anchor-id="l271">因为：</p><div class="md-section-divider"></div><p data-anchor-id="icp4"><span class="MathJax_Preview"></span></p><div class="MathJax_SVG_Display MathJax_SVG_Processing" role="textbox" aria-readonly="true"><span class="MathJax_SVG" id="MathJax-Element-128-Frame" style="font-size: 100%; display: inline-block;"></span></div><script type="math/tex; mode=display" id="MathJax-Element-128">
\begin{align}
\mathbf{o}_t&=\sigma(\mathbf{net}_{o,t})\\
\mathbf{net}_{o,t}&=W_{oh}\mathbf{h}_{t-1}+W_{ox}\mathbf{x}_t+\mathbf{b}_o\\\\
\mathbf{f}_t&=\sigma(\mathbf{net}_{f,t})\\
\mathbf{net}_{f,t}&=W_{fh}\mathbf{h}_{t-1}+W_{fx}\mathbf{x}_t+\mathbf{b}_f\\\\
\mathbf{i}_t&=\sigma(\mathbf{net}_{i,t})\\
\mathbf{net}_{i,t}&=W_{ih}\mathbf{h}_{t-1}+W_{ix}\mathbf{x}_t+\mathbf{b}_i\\\\
\mathbf{\tilde{c}}_t&=\tanh(\mathbf{net}_{\tilde{c},t})\\
\mathbf{net}_{\tilde{c},t}&=W_{ch}\mathbf{h}_{t-1}+W_{cx}\mathbf{x}_t+\mathbf{b}_c\\
\end{align}
</script><p></p><p data-anchor-id="q20b">我们很容易得出：</p><div class="md-section-divider"></div><p data-anchor-id="9n5v"><span class="MathJax_Preview"></span></p><div class="MathJax_SVG_Display MathJax_SVG_Processing" role="textbox" aria-readonly="true"><span class="MathJax_SVG" id="MathJax-Element-129-Frame" style="font-size: 100%; display: inline-block;"></span></div><script type="math/tex; mode=display" id="MathJax-Element-129">
\begin{align}
\frac{\partial{\mathbf{o}_t}}{\partial{\mathbf{net}_{o,t}}}&=diag[\mathbf{o}_t\circ(1-\mathbf{o}_t)]\\
\frac{\partial{\mathbf{net}_{o,t}}}{\partial{\mathbf{h_{t-1}}}}&=W_{oh}\\
\frac{\partial{\mathbf{f}_t}}{\partial{\mathbf{net}_{f,t}}}&=diag[\mathbf{f}_t\circ(1-\mathbf{f}_t)]\\
\frac{\partial{\mathbf{net}_{f,t}}}{\partial{\mathbf{h}_{t-1}}}&=W_{fh}\\
\frac{\partial{\mathbf{i}_t}}{\partial{\mathbf{net}_{i,t}}}&=diag[\mathbf{i}_t\circ(1-\mathbf{i}_t)]\\
\frac{\partial{\mathbf{net}_{i,t}}}{\partial{\mathbf{h}_{t-1}}}&=W_{ih}\\
\frac{\partial{\mathbf{\tilde{c}}_t}}{\partial{\mathbf{net}_{\tilde{c},t}}}&=diag[1-\mathbf{\tilde{c}}_t^2]\\
\frac{\partial{\mathbf{net}_{\tilde{c},t}}}{\partial{\mathbf{h}_{t-1}}}&=W_{ch}
\end{align}
</script><p></p><p data-anchor-id="jqdd">将上述偏导数带入到<strong>式7</strong>，我们得到：</p><div class="md-section-divider"></div><p data-anchor-id="0h91"><span class="MathJax_Preview"></span></p><div class="MathJax_SVG_Display MathJax_SVG_Processing" role="textbox" aria-readonly="true"><span class="MathJax_SVG" id="MathJax-Element-130-Frame" style="font-size: 100%; display: inline-block;"></span></div><script type="math/tex; mode=display" id="MathJax-Element-130">
\begin{align}
\delta_{t-1}&=\delta_{o,t}^T\frac{\partial{\mathbf{net}_{o,t}}}{\partial{\mathbf{h_{t-1}}}}
+\delta_{f,t}^T\frac{\partial{\mathbf{net}_{f,t}}}{\partial{\mathbf{h_{t-1}}}}
+\delta_{i,t}^T\frac{\partial{\mathbf{net}_{i,t}}}{\partial{\mathbf{h_{t-1}}}}
+\delta_{\tilde{c},t}^T\frac{\partial{\mathbf{net}_{\tilde{c},t}}}{\partial{\mathbf{h_{t-1}}}}\\
&=\delta_{o,t}^T W_{oh}
+\delta_{f,t}^TW_{fh}
+\delta_{i,t}^TW_{ih}
+\delta_{\tilde{c},t}^TW_{ch}\qquad\quad(式8)\\
\end{align}
</script><p></p><p data-anchor-id="h95a">根据<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processing" id="MathJax-Element-131-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"></span><script type="math/tex" id="MathJax-Element-131">\delta_{o,t}</script>、<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processing" id="MathJax-Element-132-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"></span><script type="math/tex" id="MathJax-Element-132">\delta_{f,t}</script>、<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processing" id="MathJax-Element-133-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"></span><script type="math/tex" id="MathJax-Element-133">\delta_{i,t}</script>、<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processing" id="MathJax-Element-134-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"></span><script type="math/tex" id="MathJax-Element-134">\delta_{\tilde{c},t}</script>的定义，可知：</p><div class="md-section-divider"></div><p data-anchor-id="02a5"><span class="MathJax_Preview"></span></p><div class="MathJax_SVG_Display MathJax_SVG_Processing" role="textbox" aria-readonly="true"><span class="MathJax_SVG" id="MathJax-Element-135-Frame" style="font-size: 100%; display: inline-block;"></span></div><script type="math/tex; mode=display" id="MathJax-Element-135">
\begin{align}
\delta_{o,t}^T&=\delta_t^T\circ\tanh(\mathbf{c}_t)\circ\mathbf{o}_t\circ(1-\mathbf{o}_t)\qquad\quad(式9)\\
\delta_{f,t}^T&=\delta_t^T\circ\mathbf{o}_t\circ(1-\tanh(\mathbf{c}_t)^2)\circ\mathbf{c}_{t-1}\circ\mathbf{f}_t\circ(1-\mathbf{f}_t)\qquad(式10)\\
\delta_{i,t}^T&=\delta_t^T\circ\mathbf{o}_t\circ(1-\tanh(\mathbf{c}_t)^2)\circ\mathbf{\tilde{c}}_t\circ\mathbf{i}_t\circ(1-\mathbf{i}_t)\qquad\quad(式11)\\
\delta_{\tilde{c},t}^T&=\delta_t^T\circ\mathbf{o}_t\circ(1-\tanh(\mathbf{c}_t)^2)\circ\mathbf{i}_t\circ(1-\mathbf{\tilde{c}}^2)\qquad\quad(式12)\\
\end{align}
</script><p></p><p data-anchor-id="19nm"><strong>式8</strong>到<strong>式12</strong>就是将误差沿时间反向传播一个时刻的公式。有了它，我们可以写出将误差项向前传递到任意k时刻的公式：</p><div class="md-section-divider"></div><p data-anchor-id="kvn9"><span class="MathJax_Preview"></span></p><div class="MathJax_SVG_Display MathJax_SVG_Processing" role="textbox" aria-readonly="true"><span class="MathJax_SVG" id="MathJax-Element-136-Frame" style="font-size: 100%; display: inline-block;"></span></div><script type="math/tex; mode=display" id="MathJax-Element-136">
\delta_k^T=\prod_{j=k}^{t-1}\delta_{o,j}^TW_{oh}
+\delta_{f,j}^TW_{fh}
+\delta_{i,j}^TW_{ih}
+\delta_{\tilde{c},j}^TW_{ch}\qquad\quad(式13)
</script><p></p><div class="md-section-divider"></div><h3 data-anchor-id="ipap" id="将误差项传递到上一层">将误差项传递到上一层</h3><p data-anchor-id="ar21">我们假设当前为第l层，定义l-1层的误差项是误差函数对l-1层<strong>加权输入</strong>的导数，即：</p><div class="md-section-divider"></div><p data-anchor-id="gfu5"><span class="MathJax_Preview"></span></p><div class="MathJax_SVG_Display MathJax_SVG_Processing" role="textbox" aria-readonly="true"><span class="MathJax_SVG" id="MathJax-Element-137-Frame" style="font-size: 100%; display: inline-block;"></span></div><script type="math/tex; mode=display" id="MathJax-Element-137">
\delta_t^{l-1}\overset{def}{=}\frac{\partial{E}}{\mathbf{net}_t^{l-1}}
</script><p></p><p data-anchor-id="02xy">本次LSTM的输入<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processing" id="MathJax-Element-138-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"></span><script type="math/tex" id="MathJax-Element-138">x_t</script>由下面的公式计算：</p><div class="md-section-divider"></div><p data-anchor-id="19on"><span class="MathJax_Preview"></span></p><div class="MathJax_SVG_Display MathJax_SVG_Processing" role="textbox" aria-readonly="true"><span class="MathJax_SVG" id="MathJax-Element-139-Frame" style="font-size: 100%; display: inline-block;"></span></div><script type="math/tex; mode=display" id="MathJax-Element-139">
\mathbf{x}_t^l=f^{l-1}(\mathbf{net}_t^{l-1})
</script><p></p><p data-anchor-id="qrzr">上式中，<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processing" id="MathJax-Element-140-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"></span><script type="math/tex" id="MathJax-Element-140">f^{l-1}</script>表示第l-1层的<strong>激活函数</strong>。</p><p data-anchor-id="389r">因为<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processing" id="MathJax-Element-141-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"></span><script type="math/tex" id="MathJax-Element-141">\mathbf{net}_{f,t}^l</script>、<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processing" id="MathJax-Element-142-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"></span><script type="math/tex" id="MathJax-Element-142">\mathbf{net}_{i,t}^l</script>、<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processing" id="MathJax-Element-143-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"></span><script type="math/tex" id="MathJax-Element-143">\mathbf{net}_{\tilde{c},t}^l</script>、<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processing" id="MathJax-Element-144-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"></span><script type="math/tex" id="MathJax-Element-144">\mathbf{net}_{o,t}^l</script>都是<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processing" id="MathJax-Element-145-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"></span><script type="math/tex" id="MathJax-Element-145">\mathbf{x}_t</script>的函数，<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processing" id="MathJax-Element-146-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"></span><script type="math/tex" id="MathJax-Element-146">\mathbf{x}_t</script>又是<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processing" id="MathJax-Element-147-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"></span><script type="math/tex" id="MathJax-Element-147">\mathbf{net}_t^{l-1}</script>的函数，因此，要求出E对<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processing" id="MathJax-Element-148-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"></span><script type="math/tex" id="MathJax-Element-148">\mathbf{net}_t^{l-1}</script>的导数，就需要使用全导数公式：</p><div class="md-section-divider"></div><p data-anchor-id="hgeb"><span class="MathJax_Preview"></span></p><div class="MathJax_SVG_Display MathJax_SVG_Processing" role="textbox" aria-readonly="true"><span class="MathJax_SVG" id="MathJax-Element-149-Frame" style="font-size: 100%; display: inline-block;"></span></div><script type="math/tex; mode=display" id="MathJax-Element-149">
\begin{align}
\frac{\partial{E}}{\partial{\mathbf{net}_t^{l-1}}}&=\frac{\partial{E}}{\partial{\mathbf{\mathbf{net}_{f,t}^l}}}\frac{\partial{\mathbf{\mathbf{net}_{f,t}^l}}}{\partial{\mathbf{x}_t^l}}\frac{\partial{\mathbf{x}_t^l}}{\partial{\mathbf{\mathbf{net}_t^{l-1}}}}
+\frac{\partial{E}}{\partial{\mathbf{\mathbf{net}_{i,t}^l}}}\frac{\partial{\mathbf{\mathbf{net}_{i,t}^l}}}{\partial{\mathbf{x}_t^l}}\frac{\partial{\mathbf{x}_t^l}}{\partial{\mathbf{\mathbf{net}_t^{l-1}}}}
+\frac{\partial{E}}{\partial{\mathbf{\mathbf{net}_{\tilde{c},t}^l}}}\frac{\partial{\mathbf{\mathbf{net}_{\tilde{c},t}^l}}}{\partial{\mathbf{x}_t^l}}\frac{\partial{\mathbf{x}_t^l}}{\partial{\mathbf{\mathbf{net}_t^{l-1}}}}
+\frac{\partial{E}}{\partial{\mathbf{\mathbf{net}_{o,t}^l}}}\frac{\partial{\mathbf{\mathbf{net}_{o,t}^l}}}{\partial{\mathbf{x}_t^l}}\frac{\partial{\mathbf{x}_t^l}}{\partial{\mathbf{\mathbf{net}_t^{l-1}}}}\\
&=\delta_{f,t}^TW_{fx}\circ f'(\mathbf{net}_t^{l-1})+\delta_{i,t}^TW_{ix}\circ f'(\mathbf{net}_t^{l-1})+\delta_{\tilde{c},t}^TW_{cx}\circ f'(\mathbf{net}_t^{l-1})+\delta_{o,t}^TW_{ox}\circ f'(\mathbf{net}_t^{l-1})\\
&=(\delta_{f,t}^TW_{fx}+\delta_{i,t}^TW_{ix}+\delta_{\tilde{c},t}^TW_{cx}+\delta_{o,t}^TW_{ox})\circ f'(\mathbf{net}_t^{l-1})\qquad\quad(式14)
\end{align}
</script><p></p><p data-anchor-id="cw5j"><strong>式14</strong>就是将误差传递到上一层的公式。</p><div class="md-section-divider"></div><h3 data-anchor-id="lqiu" id="权重梯度的计算">权重梯度的计算</h3><p data-anchor-id="zrlu">对于<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processing" id="MathJax-Element-150-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"></span><script type="math/tex" id="MathJax-Element-150">W_{fh}</script>、<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processing" id="MathJax-Element-151-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"></span><script type="math/tex" id="MathJax-Element-151">W_{ih}</script>、<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processing" id="MathJax-Element-152-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"></span><script type="math/tex" id="MathJax-Element-152">W_{ch}</script>、<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processing" id="MathJax-Element-153-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"></span><script type="math/tex" id="MathJax-Element-153">W_{oh}</script>的权重梯度，我们知道它的梯度是各个时刻梯度之和（证明过程请参考文章<a href="https://zybuluo.com/hanbingtao/note/541458" target="_blank">零基础入门深度学习(5) - 循环神经网络</a>），我们首先求出它们在t时刻的梯度，然后再求出他们最终的梯度。</p><p data-anchor-id="zjug">我们已经求得了误差项<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processing" id="MathJax-Element-154-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"></span><script type="math/tex" id="MathJax-Element-154">\delta_{o,t}</script>、<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processing" id="MathJax-Element-155-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"></span><script type="math/tex" id="MathJax-Element-155">\delta_{f,t}</script>、<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processing" id="MathJax-Element-156-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"></span><script type="math/tex" id="MathJax-Element-156">\delta_{i,t}</script>、<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processing" id="MathJax-Element-157-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"></span><script type="math/tex" id="MathJax-Element-157">\delta_{\tilde{c},t}</script>，很容易求出t时刻的<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processing" id="MathJax-Element-158-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"></span><script type="math/tex" id="MathJax-Element-158">W_{oh}</script>、的<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processing" id="MathJax-Element-159-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"></span><script type="math/tex" id="MathJax-Element-159">W_{ih}</script>、的<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processing" id="MathJax-Element-160-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"></span><script type="math/tex" id="MathJax-Element-160">W_{fh}</script>、的<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processing" id="MathJax-Element-161-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"></span><script type="math/tex" id="MathJax-Element-161">W_{ch}</script>：</p><div class="md-section-divider"></div><p data-anchor-id="ws81"><span class="MathJax_Preview"></span></p><div class="MathJax_SVG_Display MathJax_SVG_Processing" role="textbox" aria-readonly="true"><span class="MathJax_SVG" id="MathJax-Element-162-Frame" style="font-size: 100%; display: inline-block;"></span></div><script type="math/tex; mode=display" id="MathJax-Element-162">
\begin{align}
\frac{\partial{E}}{\partial{W_{oh,t}}}&=\frac{\partial{E}}{\partial{\mathbf{net}_{o,t}}}\frac{\partial{\mathbf{net}_{o,t}}}{\partial{W_{oh,t}}}\\
&=\delta_{o,t}\mathbf{h}_{t-1}^T\\\\
\frac{\partial{E}}{\partial{W_{fh,t}}}&=\frac{\partial{E}}{\partial{\mathbf{net}_{f,t}}}\frac{\partial{\mathbf{net}_{f,t}}}{\partial{W_{fh,t}}}\\
&=\delta_{f,t}\mathbf{h}_{t-1}^T\\\\
\frac{\partial{E}}{\partial{W_{ih,t}}}&=\frac{\partial{E}}{\partial{\mathbf{net}_{i,t}}}\frac{\partial{\mathbf{net}_{i,t}}}{\partial{W_{ih,t}}}\\
&=\delta_{i,t}\mathbf{h}_{t-1}^T\\\\
\frac{\partial{E}}{\partial{W_{ch,t}}}&=\frac{\partial{E}}{\partial{\mathbf{net}_{\tilde{c},t}}}\frac{\partial{\mathbf{net}_{\tilde{c},t}}}{\partial{W_{ch,t}}}\\
&=\delta_{\tilde{c},t}\mathbf{h}_{t-1}^T\\
\end{align}
</script><p></p><p data-anchor-id="y3es">将各个时刻的梯度加在一起，就能得到最终的梯度：</p><div class="md-section-divider"></div><p data-anchor-id="28hu"><span class="MathJax_Preview"></span></p><div class="MathJax_SVG_Display MathJax_SVG_Processing" role="textbox" aria-readonly="true"><span class="MathJax_SVG" id="MathJax-Element-163-Frame" style="font-size: 100%; display: inline-block;"></span></div><script type="math/tex; mode=display" id="MathJax-Element-163">
\begin{align}
\frac{\partial{E}}{\partial{W_{oh}}}&=\sum_{j=1}^t\delta_{o,j}\mathbf{h}_{j-1}^T\\
\frac{\partial{E}}{\partial{W_{fh}}}&=\sum_{j=1}^t\delta_{f,j}\mathbf{h}_{j-1}^T\\
\frac{\partial{E}}{\partial{W_{ih}}}&=\sum_{j=1}^t\delta_{i,j}\mathbf{h}_{j-1}^T\\
\frac{\partial{E}}{\partial{W_{ch}}}&=\sum_{j=1}^t\delta_{\tilde{c},j}\mathbf{h}_{j-1}^T\\
\end{align}
</script><p></p><p data-anchor-id="xilj">对于偏置项<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processing" id="MathJax-Element-164-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"></span><script type="math/tex" id="MathJax-Element-164">\mathbf{b}_f</script>、<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processing" id="MathJax-Element-165-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"></span><script type="math/tex" id="MathJax-Element-165">\mathbf{b}_i</script>、<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processing" id="MathJax-Element-166-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"></span><script type="math/tex" id="MathJax-Element-166">\mathbf{b}_c</script>、<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processing" id="MathJax-Element-167-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"></span><script type="math/tex" id="MathJax-Element-167">\mathbf{b}_o</script>的梯度，也是将各个时刻的梯度加在一起。下面是各个时刻的偏置项梯度：</p><div class="md-section-divider"></div><p data-anchor-id="kb1x"><span class="MathJax_Preview"></span></p><div class="MathJax_SVG_Display MathJax_SVG_Processing" role="textbox" aria-readonly="true"><span class="MathJax_SVG" id="MathJax-Element-168-Frame" style="font-size: 100%; display: inline-block;"></span></div><script type="math/tex; mode=display" id="MathJax-Element-168">
\begin{align}
\frac{\partial{E}}{\partial{\mathbf{b}_{o,t}}}&=\frac{\partial{E}}{\partial{\mathbf{net}_{o,t}}}\frac{\partial{\mathbf{net}_{o,t}}}{\partial{\mathbf{b}_{o,t}}}\\
&=\delta_{o,t}\\\\
\frac{\partial{E}}{\partial{\mathbf{b}_{f,t}}}&=\frac{\partial{E}}{\partial{\mathbf{net}_{f,t}}}\frac{\partial{\mathbf{net}_{f,t}}}{\partial{\mathbf{b}_{f,t}}}\\
&=\delta_{f,t}\\\\
\frac{\partial{E}}{\partial{\mathbf{b}_{i,t}}}&=\frac{\partial{E}}{\partial{\mathbf{net}_{i,t}}}\frac{\partial{\mathbf{net}_{i,t}}}{\partial{\mathbf{b}_{i,t}}}\\
&=\delta_{i,t}\\\\
\frac{\partial{E}}{\partial{\mathbf{b}_{c,t}}}&=\frac{\partial{E}}{\partial{\mathbf{net}_{\tilde{c},t}}}\frac{\partial{\mathbf{net}_{\tilde{c},t}}}{\partial{\mathbf{b}_{c,t}}}\\
&=\delta_{\tilde{c},t}\\
\end{align}
</script><p></p><p data-anchor-id="g5h3">下面是最终的偏置项梯度，即将各个时刻的偏置项梯度加在一起：</p><div class="md-section-divider"></div><p data-anchor-id="y8hw"><span class="MathJax_Preview"></span></p><div class="MathJax_SVG_Display MathJax_SVG_Processing" role="textbox" aria-readonly="true"><span class="MathJax_SVG" id="MathJax-Element-169-Frame" style="font-size: 100%; display: inline-block;"></span></div><script type="math/tex; mode=display" id="MathJax-Element-169">
\begin{align}
\frac{\partial{E}}{\partial{\mathbf{b}_o}}&=\sum_{j=1}^t\delta_{o,j}\\
\frac{\partial{E}}{\partial{\mathbf{b}_i}}&=\sum_{j=1}^t\delta_{i,j}\\
\frac{\partial{E}}{\partial{\mathbf{b}_f}}&=\sum_{j=1}^t\delta_{f,j}\\
\frac{\partial{E}}{\partial{\mathbf{b}_c}}&=\sum_{j=1}^t\delta_{\tilde{c},j}\\
\end{align}
</script><p></p><p data-anchor-id="bs8c">对于<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processing" id="MathJax-Element-170-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"></span><script type="math/tex" id="MathJax-Element-170">W_{fx}</script>、<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processing" id="MathJax-Element-171-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"></span><script type="math/tex" id="MathJax-Element-171">W_{ix}</script>、<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processing" id="MathJax-Element-172-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"></span><script type="math/tex" id="MathJax-Element-172">W_{cx}</script>、<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processing" id="MathJax-Element-173-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"></span><script type="math/tex" id="MathJax-Element-173">W_{ox}</script>的权重梯度，只需要根据相应的误差项直接计算即可：</p><div class="md-section-divider"></div><p data-anchor-id="qrvb"><span class="MathJax_Preview"></span></p><div class="MathJax_SVG_Display MathJax_SVG_Processing" role="textbox" aria-readonly="true"><span class="MathJax_SVG" id="MathJax-Element-174-Frame" style="font-size: 100%; display: inline-block;"></span></div><script type="math/tex; mode=display" id="MathJax-Element-174">
\begin{align}
\frac{\partial{E}}{\partial{W_{ox}}}&=\frac{\partial{E}}{\partial{\mathbf{net}_{o,t}}}\frac{\partial{\mathbf{net}_{o,t}}}{\partial{W_{ox}}}\\
&=\delta_{o,t}\mathbf{x}_{t}^T\\\\
\frac{\partial{E}}{\partial{W_{fx}}}&=\frac{\partial{E}}{\partial{\mathbf{net}_{f,t}}}\frac{\partial{\mathbf{net}_{f,t}}}{\partial{W_{fx}}}\\
&=\delta_{f,t}\mathbf{x}_{t}^T\\\\
\frac{\partial{E}}{\partial{W_{ix}}}&=\frac{\partial{E}}{\partial{\mathbf{net}_{i,t}}}\frac{\partial{\mathbf{net}_{i,t}}}{\partial{W_{ix}}}\\
&=\delta_{i,t}\mathbf{x}_{t}^T\\\\
\frac{\partial{E}}{\partial{W_{cx}}}&=\frac{\partial{E}}{\partial{\mathbf{net}_{\tilde{c},t}}}\frac{\partial{\mathbf{net}_{\tilde{c},t}}}{\partial{W_{cx}}}\\
&=\delta_{\tilde{c},t}\mathbf{x}_{t}^T\\
\end{align}
</script><p></p><p data-anchor-id="h8qt">以上就是LSTM的训练算法的全部公式。因为这里面存在很多重复的模式，仔细看看，会发觉并不是太复杂。</p><p data-anchor-id="0n3g">当然，LSTM存在着相当多的变体，读者可以在互联网上找到很多资料。因为大家已经熟悉了基本LSTM的算法，因此理解这些变体比较容易，因此本文就不再赘述了。</p><div class="md-section-divider"></div><h2 data-anchor-id="d8jj" id="长短时记忆网络的实现">长短时记忆网络的实现</h2><blockquote data-anchor-id="8q81" class="white-blockquote">
  <p>完整代码请参考GitHub: <a href="https://github.com/hanbt/learn_dl/blob/master/lstm.py" target="_blank">https://github.com/hanbt/learn_dl/blob/master/lstm.py</a> (python2.7)</p>
</blockquote><p data-anchor-id="xjqf">在下面的实现中，LSTMLayer的参数包括输入维度、输出维度、隐藏层维度，单元状态维度等于隐藏层维度。gate的激活函数为sigmoid函数，输出的激活函数为tanh。</p><div class="md-section-divider"></div><h3 data-anchor-id="g3kr" id="激活函数的实现">激活函数的实现</h3><p data-anchor-id="um21">我们先实现两个激活函数：sigmoid和tanh。</p><div class="md-section-divider"></div><pre class="prettyprint linenums prettyprinted" data-anchor-id="fue7" style=""><ol class="linenums"><li class="L0"><code class="language-python"><span class="kwd">class</span><span class="pln"> </span><span class="typ">SigmoidActivator</span><span class="pun">(</span><span class="pln">object</span><span class="pun">):</span></code></li><li class="L1"><code class="language-python"><span class="pln">    </span><span class="kwd">def</span><span class="pln"> forward</span><span class="pun">(</span><span class="pln">self</span><span class="pun">,</span><span class="pln"> weighted_input</span><span class="pun">):</span></code></li><li class="L2"><code class="language-python"><span class="pln">        </span><span class="kwd">return</span><span class="pln"> </span><span class="lit">1.0</span><span class="pln"> </span><span class="pun">/</span><span class="pln"> </span><span class="pun">(</span><span class="lit">1.0</span><span class="pln"> </span><span class="pun">+</span><span class="pln"> np</span><span class="pun">.</span><span class="pln">exp</span><span class="pun">(-</span><span class="pln">weighted_input</span><span class="pun">))</span></code></li><li class="L3"><code class="language-python"></code></li><li class="L4"><code class="language-python"><span class="pln">    </span><span class="kwd">def</span><span class="pln"> backward</span><span class="pun">(</span><span class="pln">self</span><span class="pun">,</span><span class="pln"> output</span><span class="pun">):</span></code></li><li class="L5"><code class="language-python"><span class="pln">        </span><span class="kwd">return</span><span class="pln"> output </span><span class="pun">*</span><span class="pln"> </span><span class="pun">(</span><span class="lit">1</span><span class="pln"> </span><span class="pun">-</span><span class="pln"> output</span><span class="pun">)</span></code></li><li class="L6"><code class="language-python"></code></li><li class="L7"><code class="language-python"></code></li><li class="L8"><code class="language-python"><span class="kwd">class</span><span class="pln"> </span><span class="typ">TanhActivator</span><span class="pun">(</span><span class="pln">object</span><span class="pun">):</span></code></li><li class="L9"><code class="language-python"><span class="pln">    </span><span class="kwd">def</span><span class="pln"> forward</span><span class="pun">(</span><span class="pln">self</span><span class="pun">,</span><span class="pln"> weighted_input</span><span class="pun">):</span></code></li><li class="L0"><code class="language-python"><span class="pln">        </span><span class="kwd">return</span><span class="pln"> </span><span class="lit">2.0</span><span class="pln"> </span><span class="pun">/</span><span class="pln"> </span><span class="pun">(</span><span class="lit">1.0</span><span class="pln"> </span><span class="pun">+</span><span class="pln"> np</span><span class="pun">.</span><span class="pln">exp</span><span class="pun">(-</span><span class="lit">2</span><span class="pln"> </span><span class="pun">*</span><span class="pln"> weighted_input</span><span class="pun">))</span><span class="pln"> </span><span class="pun">-</span><span class="pln"> </span><span class="lit">1.0</span></code></li><li class="L1"><code class="language-python"></code></li><li class="L2"><code class="language-python"><span class="pln">    </span><span class="kwd">def</span><span class="pln"> backward</span><span class="pun">(</span><span class="pln">self</span><span class="pun">,</span><span class="pln"> output</span><span class="pun">):</span></code></li><li class="L3"><code class="language-python"><span class="pln">        </span><span class="kwd">return</span><span class="pln"> </span><span class="lit">1</span><span class="pln"> </span><span class="pun">-</span><span class="pln"> output </span><span class="pun">*</span><span class="pln"> output</span></code></li></ol></pre><div class="md-section-divider"></div><h3 data-anchor-id="863d" id="lstm初始化">LSTM初始化</h3><p data-anchor-id="nv3u">和前两篇文章代码架构一样，我们把LSTM的实现放在LstmLayer类中。</p><p data-anchor-id="dksl">根据LSTM前向计算和方向传播算法，我们需要初始化一系列矩阵和向量。这些矩阵和向量有两类用途，一类是用于保存模型参数，例如<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processing" id="MathJax-Element-175-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"></span><script type="math/tex" id="MathJax-Element-175">W_f</script>、<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processing" id="MathJax-Element-176-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"></span><script type="math/tex" id="MathJax-Element-176">W_i</script>、<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processing" id="MathJax-Element-177-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"></span><script type="math/tex" id="MathJax-Element-177">W_o</script>、<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processing" id="MathJax-Element-178-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"></span><script type="math/tex" id="MathJax-Element-178">W_c</script>、<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processing" id="MathJax-Element-179-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"></span><script type="math/tex" id="MathJax-Element-179">\mathbf{b}_f</script>、<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processing" id="MathJax-Element-180-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"></span><script type="math/tex" id="MathJax-Element-180">\mathbf{b}_i</script>、<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processing" id="MathJax-Element-181-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"></span><script type="math/tex" id="MathJax-Element-181">\mathbf{b}_o</script>、<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processing" id="MathJax-Element-182-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"></span><script type="math/tex" id="MathJax-Element-182">\mathbf{b}_c</script>；另一类是保存各种中间计算结果，以便于反向传播算法使用，它们包括<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processing" id="MathJax-Element-183-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"></span><script type="math/tex" id="MathJax-Element-183">\mathbf{h}_t</script>、<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processing" id="MathJax-Element-184-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"></span><script type="math/tex" id="MathJax-Element-184">\mathbf{f}_t</script>、<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processing" id="MathJax-Element-185-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"></span><script type="math/tex" id="MathJax-Element-185">\mathbf{i}_t</script>、<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processing" id="MathJax-Element-186-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"></span><script type="math/tex" id="MathJax-Element-186">\mathbf{o}_t</script>、<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processing" id="MathJax-Element-187-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"></span><script type="math/tex" id="MathJax-Element-187">\mathbf{c}_t</script>、<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processing" id="MathJax-Element-188-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"></span><script type="math/tex" id="MathJax-Element-188">\mathbf{\tilde{c}}_t</script>、<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processing" id="MathJax-Element-189-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"></span><script type="math/tex" id="MathJax-Element-189">\delta_t</script>、<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processing" id="MathJax-Element-190-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"></span><script type="math/tex" id="MathJax-Element-190">\delta_{f,t}</script>、<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processing" id="MathJax-Element-191-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"></span><script type="math/tex" id="MathJax-Element-191">\delta_{i,t}</script>、<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processing" id="MathJax-Element-192-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"></span><script type="math/tex" id="MathJax-Element-192">\delta_{o,t}</script>、<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processing" id="MathJax-Element-193-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"></span><script type="math/tex" id="MathJax-Element-193">\delta_{\tilde{c},t}</script>，以及各个权重对应的梯度。</p><p data-anchor-id="0wy4">在构造函数的初始化中，只初始化了与forward计算相关的变量，与backward相关的变量没有初始化。这是因为构造LSTM对象的时候，我们还不知道它未来是用于训练（既有forward又有backward）还是推理（只有forward）。</p><div class="md-section-divider"></div><pre class="prettyprint linenums prettyprinted" data-anchor-id="rice" style=""><ol class="linenums"><li class="L0"><code class="language-python"><span class="kwd">class</span><span class="pln"> </span><span class="typ">LstmLayer</span><span class="pun">(</span><span class="pln">object</span><span class="pun">):</span></code></li><li class="L1"><code class="language-python"><span class="pln">    </span><span class="kwd">def</span><span class="pln"> __init__</span><span class="pun">(</span><span class="pln">self</span><span class="pun">,</span><span class="pln"> input_width</span><span class="pun">,</span><span class="pln"> state_width</span><span class="pun">,</span><span class="pln"> </span></code></li><li class="L2"><code class="language-python"><span class="pln">                 learning_rate</span><span class="pun">):</span></code></li><li class="L3"><code class="language-python"><span class="pln">        self</span><span class="pun">.</span><span class="pln">input_width </span><span class="pun">=</span><span class="pln"> input_width</span></code></li><li class="L4"><code class="language-python"><span class="pln">        self</span><span class="pun">.</span><span class="pln">state_width </span><span class="pun">=</span><span class="pln"> state_width</span></code></li><li class="L5"><code class="language-python"><span class="pln">        self</span><span class="pun">.</span><span class="pln">learning_rate </span><span class="pun">=</span><span class="pln"> learning_rate</span></code></li><li class="L6"><code class="language-python"><span class="pln">        </span><span class="com"># 门的激活函数</span></code></li><li class="L7"><code class="language-python"><span class="pln">        self</span><span class="pun">.</span><span class="pln">gate_activator </span><span class="pun">=</span><span class="pln"> </span><span class="typ">SigmoidActivator</span><span class="pun">()</span></code></li><li class="L8"><code class="language-python"><span class="pln">        </span><span class="com"># 输出的激活函数</span></code></li><li class="L9"><code class="language-python"><span class="pln">        self</span><span class="pun">.</span><span class="pln">output_activator </span><span class="pun">=</span><span class="pln"> </span><span class="typ">TanhActivator</span><span class="pun">()</span></code></li><li class="L0"><code class="language-python"><span class="pln">        </span><span class="com"># 当前时刻初始化为t0</span></code></li><li class="L1"><code class="language-python"><span class="pln">        self</span><span class="pun">.</span><span class="pln">times </span><span class="pun">=</span><span class="pln"> </span><span class="lit">0</span><span class="pln">       </span></code></li><li class="L2"><code class="language-python"><span class="pln">        </span><span class="com"># 各个时刻的单元状态向量c</span></code></li><li class="L3"><code class="language-python"><span class="pln">        self</span><span class="pun">.</span><span class="pln">c_list </span><span class="pun">=</span><span class="pln"> self</span><span class="pun">.</span><span class="pln">init_state_vec</span><span class="pun">()</span></code></li><li class="L4"><code class="language-python"><span class="pln">        </span><span class="com"># 各个时刻的输出向量h</span></code></li><li class="L5"><code class="language-python"><span class="pln">        self</span><span class="pun">.</span><span class="pln">h_list </span><span class="pun">=</span><span class="pln"> self</span><span class="pun">.</span><span class="pln">init_state_vec</span><span class="pun">()</span></code></li><li class="L6"><code class="language-python"><span class="pln">        </span><span class="com"># 各个时刻的遗忘门f</span></code></li><li class="L7"><code class="language-python"><span class="pln">        self</span><span class="pun">.</span><span class="pln">f_list </span><span class="pun">=</span><span class="pln"> self</span><span class="pun">.</span><span class="pln">init_state_vec</span><span class="pun">()</span></code></li><li class="L8"><code class="language-python"><span class="pln">        </span><span class="com"># 各个时刻的输入门i</span></code></li><li class="L9"><code class="language-python"><span class="pln">        self</span><span class="pun">.</span><span class="pln">i_list </span><span class="pun">=</span><span class="pln"> self</span><span class="pun">.</span><span class="pln">init_state_vec</span><span class="pun">()</span></code></li><li class="L0"><code class="language-python"><span class="pln">        </span><span class="com"># 各个时刻的输出门o</span></code></li><li class="L1"><code class="language-python"><span class="pln">        self</span><span class="pun">.</span><span class="pln">o_list </span><span class="pun">=</span><span class="pln"> self</span><span class="pun">.</span><span class="pln">init_state_vec</span><span class="pun">()</span></code></li><li class="L2"><code class="language-python"><span class="pln">        </span><span class="com"># 各个时刻的即时状态c~</span></code></li><li class="L3"><code class="language-python"><span class="pln">        self</span><span class="pun">.</span><span class="pln">ct_list </span><span class="pun">=</span><span class="pln"> self</span><span class="pun">.</span><span class="pln">init_state_vec</span><span class="pun">()</span></code></li><li class="L4"><code class="language-python"><span class="pln">        </span><span class="com"># 遗忘门权重矩阵Wfh, Wfx, 偏置项bf</span></code></li><li class="L5"><code class="language-python"><span class="pln">        self</span><span class="pun">.</span><span class="typ">Wfh</span><span class="pun">,</span><span class="pln"> self</span><span class="pun">.</span><span class="typ">Wfx</span><span class="pun">,</span><span class="pln"> self</span><span class="pun">.</span><span class="pln">bf </span><span class="pun">=</span><span class="pln"> </span><span class="pun">(</span></code></li><li class="L6"><code class="language-python"><span class="pln">            self</span><span class="pun">.</span><span class="pln">init_weight_mat</span><span class="pun">())</span></code></li><li class="L7"><code class="language-python"><span class="pln">        </span><span class="com"># 输入门权重矩阵Wfh, Wfx, 偏置项bf</span></code></li><li class="L8"><code class="language-python"><span class="pln">        self</span><span class="pun">.</span><span class="typ">Wih</span><span class="pun">,</span><span class="pln"> self</span><span class="pun">.</span><span class="typ">Wix</span><span class="pun">,</span><span class="pln"> self</span><span class="pun">.</span><span class="pln">bi </span><span class="pun">=</span><span class="pln"> </span><span class="pun">(</span></code></li><li class="L9"><code class="language-python"><span class="pln">            self</span><span class="pun">.</span><span class="pln">init_weight_mat</span><span class="pun">())</span></code></li><li class="L0"><code class="language-python"><span class="pln">        </span><span class="com"># 输出门权重矩阵Wfh, Wfx, 偏置项bf</span></code></li><li class="L1"><code class="language-python"><span class="pln">        self</span><span class="pun">.</span><span class="typ">Woh</span><span class="pun">,</span><span class="pln"> self</span><span class="pun">.</span><span class="typ">Wox</span><span class="pun">,</span><span class="pln"> self</span><span class="pun">.</span><span class="pln">bo </span><span class="pun">=</span><span class="pln"> </span><span class="pun">(</span></code></li><li class="L2"><code class="language-python"><span class="pln">            self</span><span class="pun">.</span><span class="pln">init_weight_mat</span><span class="pun">())</span></code></li><li class="L3"><code class="language-python"><span class="pln">        </span><span class="com"># 单元状态权重矩阵Wfh, Wfx, 偏置项bf</span></code></li><li class="L4"><code class="language-python"><span class="pln">        self</span><span class="pun">.</span><span class="typ">Wch</span><span class="pun">,</span><span class="pln"> self</span><span class="pun">.</span><span class="typ">Wcx</span><span class="pun">,</span><span class="pln"> self</span><span class="pun">.</span><span class="pln">bc </span><span class="pun">=</span><span class="pln"> </span><span class="pun">(</span></code></li><li class="L5"><code class="language-python"><span class="pln">            self</span><span class="pun">.</span><span class="pln">init_weight_mat</span><span class="pun">())</span></code></li><li class="L6"><code class="language-python"></code></li><li class="L7"><code class="language-python"><span class="pln">    </span><span class="kwd">def</span><span class="pln"> init_state_vec</span><span class="pun">(</span><span class="pln">self</span><span class="pun">):</span></code></li><li class="L8"><code class="language-python"><span class="pln">        </span><span class="str">'''</span></code></li><li class="L9"><code class="language-python"><span class="str">        初始化保存状态的向量</span></code></li><li class="L0"><code class="language-python"><span class="str">        '''</span></code></li><li class="L1"><code class="language-python"><span class="pln">        state_vec_list </span><span class="pun">=</span><span class="pln"> </span><span class="pun">[]</span></code></li><li class="L2"><code class="language-python"><span class="pln">        state_vec_list</span><span class="pun">.</span><span class="pln">append</span><span class="pun">(</span><span class="pln">np</span><span class="pun">.</span><span class="pln">zeros</span><span class="pun">(</span></code></li><li class="L3"><code class="language-python"><span class="pln">            </span><span class="pun">(</span><span class="pln">self</span><span class="pun">.</span><span class="pln">state_width</span><span class="pun">,</span><span class="pln"> </span><span class="lit">1</span><span class="pun">)))</span></code></li><li class="L4"><code class="language-python"><span class="pln">        </span><span class="kwd">return</span><span class="pln"> state_vec_list</span></code></li><li class="L5"><code class="language-python"></code></li><li class="L6"><code class="language-python"><span class="pln">    </span><span class="kwd">def</span><span class="pln"> init_weight_mat</span><span class="pun">(</span><span class="pln">self</span><span class="pun">):</span></code></li><li class="L7"><code class="language-python"><span class="pln">        </span><span class="str">'''</span></code></li><li class="L8"><code class="language-python"><span class="str">        初始化权重矩阵</span></code></li><li class="L9"><code class="language-python"><span class="str">        '''</span></code></li><li class="L0"><code class="language-python"><span class="pln">        </span><span class="typ">Wh</span><span class="pln"> </span><span class="pun">=</span><span class="pln"> np</span><span class="pun">.</span><span class="pln">random</span><span class="pun">.</span><span class="pln">uniform</span><span class="pun">(-</span><span class="lit">1e-4</span><span class="pun">,</span><span class="pln"> </span><span class="lit">1e-4</span><span class="pun">,</span></code></li><li class="L1"><code class="language-python"><span class="pln">            </span><span class="pun">(</span><span class="pln">self</span><span class="pun">.</span><span class="pln">state_width</span><span class="pun">,</span><span class="pln"> self</span><span class="pun">.</span><span class="pln">state_width</span><span class="pun">))</span></code></li><li class="L2"><code class="language-python"><span class="pln">        </span><span class="typ">Wx</span><span class="pln"> </span><span class="pun">=</span><span class="pln"> np</span><span class="pun">.</span><span class="pln">random</span><span class="pun">.</span><span class="pln">uniform</span><span class="pun">(-</span><span class="lit">1e-4</span><span class="pun">,</span><span class="pln"> </span><span class="lit">1e-4</span><span class="pun">,</span></code></li><li class="L3"><code class="language-python"><span class="pln">            </span><span class="pun">(</span><span class="pln">self</span><span class="pun">.</span><span class="pln">state_width</span><span class="pun">,</span><span class="pln"> self</span><span class="pun">.</span><span class="pln">input_width</span><span class="pun">))</span></code></li><li class="L4"><code class="language-python"><span class="pln">        b </span><span class="pun">=</span><span class="pln"> np</span><span class="pun">.</span><span class="pln">zeros</span><span class="pun">((</span><span class="pln">self</span><span class="pun">.</span><span class="pln">state_width</span><span class="pun">,</span><span class="pln"> </span><span class="lit">1</span><span class="pun">))</span></code></li><li class="L5"><code class="language-python"><span class="pln">        </span><span class="kwd">return</span><span class="pln"> </span><span class="typ">Wh</span><span class="pun">,</span><span class="pln"> </span><span class="typ">Wx</span><span class="pun">,</span><span class="pln"> b</span></code></li></ol></pre><div class="md-section-divider"></div><h3 data-anchor-id="zpb8" id="前向计算的实现">前向计算的实现</h3><p data-anchor-id="dnml">forward方法实现了LSTM的前向计算：</p><div class="md-section-divider"></div><pre class="prettyprint linenums prettyprinted" data-anchor-id="jygv" style=""><ol class="linenums"><li class="L0"><code class="language-python"><span class="pln">    </span><span class="kwd">def</span><span class="pln"> forward</span><span class="pun">(</span><span class="pln">self</span><span class="pun">,</span><span class="pln"> x</span><span class="pun">):</span></code></li><li class="L1"><code class="language-python"><span class="pln">        </span><span class="str">'''</span></code></li><li class="L2"><code class="language-python"><span class="str">        根据式1-式6进行前向计算</span></code></li><li class="L3"><code class="language-python"><span class="str">        '''</span></code></li><li class="L4"><code class="language-python"><span class="pln">        self</span><span class="pun">.</span><span class="pln">times </span><span class="pun">+=</span><span class="pln"> </span><span class="lit">1</span></code></li><li class="L5"><code class="language-python"><span class="pln">        </span><span class="com"># 遗忘门</span></code></li><li class="L6"><code class="language-python"><span class="pln">        fg </span><span class="pun">=</span><span class="pln"> self</span><span class="pun">.</span><span class="pln">calc_gate</span><span class="pun">(</span><span class="pln">x</span><span class="pun">,</span><span class="pln"> self</span><span class="pun">.</span><span class="typ">Wfx</span><span class="pun">,</span><span class="pln"> self</span><span class="pun">.</span><span class="typ">Wfh</span><span class="pun">,</span><span class="pln"> </span></code></li><li class="L7"><code class="language-python"><span class="pln">            self</span><span class="pun">.</span><span class="pln">bf</span><span class="pun">,</span><span class="pln"> self</span><span class="pun">.</span><span class="pln">gate_activator</span><span class="pun">)</span></code></li><li class="L8"><code class="language-python"><span class="pln">        self</span><span class="pun">.</span><span class="pln">f_list</span><span class="pun">.</span><span class="pln">append</span><span class="pun">(</span><span class="pln">fg</span><span class="pun">)</span></code></li><li class="L9"><code class="language-python"><span class="pln">        </span><span class="com"># 输入门</span></code></li><li class="L0"><code class="language-python"><span class="pln">        ig </span><span class="pun">=</span><span class="pln"> self</span><span class="pun">.</span><span class="pln">calc_gate</span><span class="pun">(</span><span class="pln">x</span><span class="pun">,</span><span class="pln"> self</span><span class="pun">.</span><span class="typ">Wix</span><span class="pun">,</span><span class="pln"> self</span><span class="pun">.</span><span class="typ">Wih</span><span class="pun">,</span></code></li><li class="L1"><code class="language-python"><span class="pln">            self</span><span class="pun">.</span><span class="pln">bi</span><span class="pun">,</span><span class="pln"> self</span><span class="pun">.</span><span class="pln">gate_activator</span><span class="pun">)</span></code></li><li class="L2"><code class="language-python"><span class="pln">        self</span><span class="pun">.</span><span class="pln">i_list</span><span class="pun">.</span><span class="pln">append</span><span class="pun">(</span><span class="pln">ig</span><span class="pun">)</span></code></li><li class="L3"><code class="language-python"><span class="pln">        </span><span class="com"># 输出门</span></code></li><li class="L4"><code class="language-python"><span class="pln">        og </span><span class="pun">=</span><span class="pln"> self</span><span class="pun">.</span><span class="pln">calc_gate</span><span class="pun">(</span><span class="pln">x</span><span class="pun">,</span><span class="pln"> self</span><span class="pun">.</span><span class="typ">Wox</span><span class="pun">,</span><span class="pln"> self</span><span class="pun">.</span><span class="typ">Woh</span><span class="pun">,</span></code></li><li class="L5"><code class="language-python"><span class="pln">            self</span><span class="pun">.</span><span class="pln">bo</span><span class="pun">,</span><span class="pln"> self</span><span class="pun">.</span><span class="pln">gate_activator</span><span class="pun">)</span></code></li><li class="L6"><code class="language-python"><span class="pln">        self</span><span class="pun">.</span><span class="pln">o_list</span><span class="pun">.</span><span class="pln">append</span><span class="pun">(</span><span class="pln">og</span><span class="pun">)</span></code></li><li class="L7"><code class="language-python"><span class="pln">        </span><span class="com"># 即时状态</span></code></li><li class="L8"><code class="language-python"><span class="pln">        ct </span><span class="pun">=</span><span class="pln"> self</span><span class="pun">.</span><span class="pln">calc_gate</span><span class="pun">(</span><span class="pln">x</span><span class="pun">,</span><span class="pln"> self</span><span class="pun">.</span><span class="typ">Wcx</span><span class="pun">,</span><span class="pln"> self</span><span class="pun">.</span><span class="typ">Wch</span><span class="pun">,</span></code></li><li class="L9"><code class="language-python"><span class="pln">            self</span><span class="pun">.</span><span class="pln">bc</span><span class="pun">,</span><span class="pln"> self</span><span class="pun">.</span><span class="pln">output_activator</span><span class="pun">)</span></code></li><li class="L0"><code class="language-python"><span class="pln">        self</span><span class="pun">.</span><span class="pln">ct_list</span><span class="pun">.</span><span class="pln">append</span><span class="pun">(</span><span class="pln">ct</span><span class="pun">)</span></code></li><li class="L1"><code class="language-python"><span class="pln">        </span><span class="com"># 单元状态</span></code></li><li class="L2"><code class="language-python"><span class="pln">        c </span><span class="pun">=</span><span class="pln"> fg </span><span class="pun">*</span><span class="pln"> self</span><span class="pun">.</span><span class="pln">c_list</span><span class="pun">[</span><span class="pln">self</span><span class="pun">.</span><span class="pln">times </span><span class="pun">-</span><span class="pln"> </span><span class="lit">1</span><span class="pun">]</span><span class="pln"> </span><span class="pun">+</span><span class="pln"> ig </span><span class="pun">*</span><span class="pln"> ct</span></code></li><li class="L3"><code class="language-python"><span class="pln">        self</span><span class="pun">.</span><span class="pln">c_list</span><span class="pun">.</span><span class="pln">append</span><span class="pun">(</span><span class="pln">c</span><span class="pun">)</span></code></li><li class="L4"><code class="language-python"><span class="pln">        </span><span class="com"># 输出</span></code></li><li class="L5"><code class="language-python"><span class="pln">        h </span><span class="pun">=</span><span class="pln"> og </span><span class="pun">*</span><span class="pln"> self</span><span class="pun">.</span><span class="pln">output_activator</span><span class="pun">.</span><span class="pln">forward</span><span class="pun">(</span><span class="pln">c</span><span class="pun">)</span></code></li><li class="L6"><code class="language-python"><span class="pln">        self</span><span class="pun">.</span><span class="pln">h_list</span><span class="pun">.</span><span class="pln">append</span><span class="pun">(</span><span class="pln">h</span><span class="pun">)</span></code></li><li class="L7"><code class="language-python"></code></li><li class="L8"><code class="language-python"><span class="pln">    </span><span class="kwd">def</span><span class="pln"> calc_gate</span><span class="pun">(</span><span class="pln">self</span><span class="pun">,</span><span class="pln"> x</span><span class="pun">,</span><span class="pln"> </span><span class="typ">Wx</span><span class="pun">,</span><span class="pln"> </span><span class="typ">Wh</span><span class="pun">,</span><span class="pln"> b</span><span class="pun">,</span><span class="pln"> activator</span><span class="pun">):</span></code></li><li class="L9"><code class="language-python"><span class="pln">        </span><span class="str">'''</span></code></li><li class="L0"><code class="language-python"><span class="str">        计算门</span></code></li><li class="L1"><code class="language-python"><span class="str">        '''</span></code></li><li class="L2"><code class="language-python"><span class="pln">        h </span><span class="pun">=</span><span class="pln"> self</span><span class="pun">.</span><span class="pln">h_list</span><span class="pun">[</span><span class="pln">self</span><span class="pun">.</span><span class="pln">times </span><span class="pun">-</span><span class="pln"> </span><span class="lit">1</span><span class="pun">]</span><span class="pln"> </span><span class="com"># 上次的LSTM输出</span></code></li><li class="L3"><code class="language-python"><span class="pln">        net </span><span class="pun">=</span><span class="pln"> np</span><span class="pun">.</span><span class="pln">dot</span><span class="pun">(</span><span class="typ">Wh</span><span class="pun">,</span><span class="pln"> h</span><span class="pun">)</span><span class="pln"> </span><span class="pun">+</span><span class="pln"> np</span><span class="pun">.</span><span class="pln">dot</span><span class="pun">(</span><span class="typ">Wx</span><span class="pun">,</span><span class="pln"> x</span><span class="pun">)</span><span class="pln"> </span><span class="pun">+</span><span class="pln"> b</span></code></li><li class="L4"><code class="language-python"><span class="pln">        gate </span><span class="pun">=</span><span class="pln"> activator</span><span class="pun">.</span><span class="pln">forward</span><span class="pun">(</span><span class="pln">net</span><span class="pun">)</span></code></li><li class="L5"><code class="language-python"><span class="pln">        </span><span class="kwd">return</span><span class="pln"> gate</span></code></li></ol></pre><p data-anchor-id="0aa3">从上面的代码我们可以看到，门的计算都是相同的算法，而门和<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processing" id="MathJax-Element-194-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"></span><script type="math/tex" id="MathJax-Element-194">\mathbf{\tilde{c}_t}</script>的计算仅仅是激活函数不同。因此我们提出了calc_gate方法，这样减少了很多重复代码。</p><div class="md-section-divider"></div><h3 data-anchor-id="5y0e" id="反向传播算法的实现">反向传播算法的实现</h3><p data-anchor-id="se98">backward方法实现了LSTM的反向传播算法。需要注意的是，与backword相关的内部状态变量是在调用backward方法之后才初始化的。这种延迟初始化的一个好处是，如果LSTM只是用来推理，那么就不需要初始化这些变量，节省了很多内存。</p><div class="md-section-divider"></div><pre class="prettyprint linenums prettyprinted" data-anchor-id="j9g4" style=""><ol class="linenums"><li class="L0"><code class="language-python"><span class="pln">    </span><span class="kwd">def</span><span class="pln"> backward</span><span class="pun">(</span><span class="pln">self</span><span class="pun">,</span><span class="pln"> x</span><span class="pun">,</span><span class="pln"> delta_h</span><span class="pun">,</span><span class="pln"> activator</span><span class="pun">):</span></code></li><li class="L1"><code class="language-python"><span class="pln">        </span><span class="str">'''</span></code></li><li class="L2"><code class="language-python"><span class="str">        实现LSTM训练算法</span></code></li><li class="L3"><code class="language-python"><span class="str">        '''</span></code></li><li class="L4"><code class="language-python"><span class="pln">        self</span><span class="pun">.</span><span class="pln">calc_delta</span><span class="pun">(</span><span class="pln">delta_h</span><span class="pun">,</span><span class="pln"> activator</span><span class="pun">)</span></code></li><li class="L5"><code class="language-python"><span class="pln">        self</span><span class="pun">.</span><span class="pln">calc_gradient</span><span class="pun">(</span><span class="pln">x</span><span class="pun">)</span></code></li></ol></pre><p data-anchor-id="t2nc">算法主要分成两个部分，一部分使计算误差项：</p><div class="md-section-divider"></div><pre class="prettyprint linenums prettyprinted" data-anchor-id="2p3o" style=""><ol class="linenums"><li class="L0"><code class="language-python"><span class="pln">    </span><span class="kwd">def</span><span class="pln"> calc_delta</span><span class="pun">(</span><span class="pln">self</span><span class="pun">,</span><span class="pln"> delta_h</span><span class="pun">,</span><span class="pln"> activator</span><span class="pun">):</span></code></li><li class="L1"><code class="language-python"><span class="pln">        </span><span class="com"># 初始化各个时刻的误差项</span></code></li><li class="L2"><code class="language-python"><span class="pln">        self</span><span class="pun">.</span><span class="pln">delta_h_list </span><span class="pun">=</span><span class="pln"> self</span><span class="pun">.</span><span class="pln">init_delta</span><span class="pun">()</span><span class="pln">  </span><span class="com"># 输出误差项</span></code></li><li class="L3"><code class="language-python"><span class="pln">        self</span><span class="pun">.</span><span class="pln">delta_o_list </span><span class="pun">=</span><span class="pln"> self</span><span class="pun">.</span><span class="pln">init_delta</span><span class="pun">()</span><span class="pln">  </span><span class="com"># 输出门误差项</span></code></li><li class="L4"><code class="language-python"><span class="pln">        self</span><span class="pun">.</span><span class="pln">delta_i_list </span><span class="pun">=</span><span class="pln"> self</span><span class="pun">.</span><span class="pln">init_delta</span><span class="pun">()</span><span class="pln">  </span><span class="com"># 输入门误差项</span></code></li><li class="L5"><code class="language-python"><span class="pln">        self</span><span class="pun">.</span><span class="pln">delta_f_list </span><span class="pun">=</span><span class="pln"> self</span><span class="pun">.</span><span class="pln">init_delta</span><span class="pun">()</span><span class="pln">  </span><span class="com"># 遗忘门误差项</span></code></li><li class="L6"><code class="language-python"><span class="pln">        self</span><span class="pun">.</span><span class="pln">delta_ct_list </span><span class="pun">=</span><span class="pln"> self</span><span class="pun">.</span><span class="pln">init_delta</span><span class="pun">()</span><span class="pln"> </span><span class="com"># 即时输出误差项</span></code></li><li class="L7"><code class="language-python"></code></li><li class="L8"><code class="language-python"><span class="pln">        </span><span class="com"># 保存从上一层传递下来的当前时刻的误差项</span></code></li><li class="L9"><code class="language-python"><span class="pln">        self</span><span class="pun">.</span><span class="pln">delta_h_list</span><span class="pun">[-</span><span class="lit">1</span><span class="pun">]</span><span class="pln"> </span><span class="pun">=</span><span class="pln"> delta_h</span></code></li><li class="L0"><code class="language-python"></code></li><li class="L1"><code class="language-python"><span class="pln">        </span><span class="com"># 迭代计算每个时刻的误差项</span></code></li><li class="L2"><code class="language-python"><span class="pln">        </span><span class="kwd">for</span><span class="pln"> k </span><span class="kwd">in</span><span class="pln"> range</span><span class="pun">(</span><span class="pln">self</span><span class="pun">.</span><span class="pln">times</span><span class="pun">,</span><span class="pln"> </span><span class="lit">0</span><span class="pun">,</span><span class="pln"> </span><span class="pun">-</span><span class="lit">1</span><span class="pun">):</span></code></li><li class="L3"><code class="language-python"><span class="pln">            self</span><span class="pun">.</span><span class="pln">calc_delta_k</span><span class="pun">(</span><span class="pln">k</span><span class="pun">)</span></code></li><li class="L4"><code class="language-python"></code></li><li class="L5"><code class="language-python"><span class="pln">    </span><span class="kwd">def</span><span class="pln"> init_delta</span><span class="pun">(</span><span class="pln">self</span><span class="pun">):</span></code></li><li class="L6"><code class="language-python"><span class="pln">        </span><span class="str">'''</span></code></li><li class="L7"><code class="language-python"><span class="str">        初始化误差项</span></code></li><li class="L8"><code class="language-python"><span class="str">        '''</span></code></li><li class="L9"><code class="language-python"><span class="pln">        delta_list </span><span class="pun">=</span><span class="pln"> </span><span class="pun">[]</span></code></li><li class="L0"><code class="language-python"><span class="pln">        </span><span class="kwd">for</span><span class="pln"> i </span><span class="kwd">in</span><span class="pln"> range</span><span class="pun">(</span><span class="pln">self</span><span class="pun">.</span><span class="pln">times </span><span class="pun">+</span><span class="pln"> </span><span class="lit">1</span><span class="pun">):</span></code></li><li class="L1"><code class="language-python"><span class="pln">            delta_list</span><span class="pun">.</span><span class="pln">append</span><span class="pun">(</span><span class="pln">np</span><span class="pun">.</span><span class="pln">zeros</span><span class="pun">(</span></code></li><li class="L2"><code class="language-python"><span class="pln">                </span><span class="pun">(</span><span class="pln">self</span><span class="pun">.</span><span class="pln">state_width</span><span class="pun">,</span><span class="pln"> </span><span class="lit">1</span><span class="pun">)))</span></code></li><li class="L3"><code class="language-python"><span class="pln">        </span><span class="kwd">return</span><span class="pln"> delta_list</span></code></li><li class="L4"><code class="language-python"></code></li><li class="L5"><code class="language-python"><span class="pln">    </span><span class="kwd">def</span><span class="pln"> calc_delta_k</span><span class="pun">(</span><span class="pln">self</span><span class="pun">,</span><span class="pln"> k</span><span class="pun">):</span></code></li><li class="L6"><code class="language-python"><span class="pln">        </span><span class="str">'''</span></code></li><li class="L7"><code class="language-python"><span class="str">        根据k时刻的delta_h，计算k时刻的delta_f、</span></code></li><li class="L8"><code class="language-python"><span class="str">        delta_i、delta_o、delta_ct，以及k-1时刻的delta_h</span></code></li><li class="L9"><code class="language-python"><span class="str">        '''</span></code></li><li class="L0"><code class="language-python"><span class="pln">        </span><span class="com"># 获得k时刻前向计算的值</span></code></li><li class="L1"><code class="language-python"><span class="pln">        ig </span><span class="pun">=</span><span class="pln"> self</span><span class="pun">.</span><span class="pln">i_list</span><span class="pun">[</span><span class="pln">k</span><span class="pun">]</span></code></li><li class="L2"><code class="language-python"><span class="pln">        og </span><span class="pun">=</span><span class="pln"> self</span><span class="pun">.</span><span class="pln">o_list</span><span class="pun">[</span><span class="pln">k</span><span class="pun">]</span></code></li><li class="L3"><code class="language-python"><span class="pln">        fg </span><span class="pun">=</span><span class="pln"> self</span><span class="pun">.</span><span class="pln">f_list</span><span class="pun">[</span><span class="pln">k</span><span class="pun">]</span></code></li><li class="L4"><code class="language-python"><span class="pln">        ct </span><span class="pun">=</span><span class="pln"> self</span><span class="pun">.</span><span class="pln">ct_list</span><span class="pun">[</span><span class="pln">k</span><span class="pun">]</span></code></li><li class="L5"><code class="language-python"><span class="pln">        c </span><span class="pun">=</span><span class="pln"> self</span><span class="pun">.</span><span class="pln">c_list</span><span class="pun">[</span><span class="pln">k</span><span class="pun">]</span></code></li><li class="L6"><code class="language-python"><span class="pln">        c_prev </span><span class="pun">=</span><span class="pln"> self</span><span class="pun">.</span><span class="pln">c_list</span><span class="pun">[</span><span class="pln">k</span><span class="pun">-</span><span class="lit">1</span><span class="pun">]</span></code></li><li class="L7"><code class="language-python"><span class="pln">        tanh_c </span><span class="pun">=</span><span class="pln"> self</span><span class="pun">.</span><span class="pln">output_activator</span><span class="pun">.</span><span class="pln">forward</span><span class="pun">(</span><span class="pln">c</span><span class="pun">)</span></code></li><li class="L8"><code class="language-python"><span class="pln">        delta_k </span><span class="pun">=</span><span class="pln"> self</span><span class="pun">.</span><span class="pln">delta_h_list</span><span class="pun">[</span><span class="pln">k</span><span class="pun">]</span></code></li><li class="L9"><code class="language-python"></code></li><li class="L0"><code class="language-python"><span class="pln">        </span><span class="com"># 根据式9计算delta_o</span></code></li><li class="L1"><code class="language-python"><span class="pln">        delta_o </span><span class="pun">=</span><span class="pln"> </span><span class="pun">(</span><span class="pln">delta_k </span><span class="pun">*</span><span class="pln"> tanh_c </span><span class="pun">*</span><span class="pln"> </span></code></li><li class="L2"><code class="language-python"><span class="pln">            self</span><span class="pun">.</span><span class="pln">gate_activator</span><span class="pun">.</span><span class="pln">backward</span><span class="pun">(</span><span class="pln">og</span><span class="pun">))</span></code></li><li class="L3"><code class="language-python"><span class="pln">        delta_f </span><span class="pun">=</span><span class="pln"> </span><span class="pun">(</span><span class="pln">delta_k </span><span class="pun">*</span><span class="pln"> og </span><span class="pun">*</span><span class="pln"> </span></code></li><li class="L4"><code class="language-python"><span class="pln">            </span><span class="pun">(</span><span class="lit">1</span><span class="pln"> </span><span class="pun">-</span><span class="pln"> tanh_c </span><span class="pun">*</span><span class="pln"> tanh_c</span><span class="pun">)</span><span class="pln"> </span><span class="pun">*</span><span class="pln"> c_prev </span><span class="pun">*</span></code></li><li class="L5"><code class="language-python"><span class="pln">            self</span><span class="pun">.</span><span class="pln">gate_activator</span><span class="pun">.</span><span class="pln">backward</span><span class="pun">(</span><span class="pln">fg</span><span class="pun">))</span></code></li><li class="L6"><code class="language-python"><span class="pln">        delta_i </span><span class="pun">=</span><span class="pln"> </span><span class="pun">(</span><span class="pln">delta_k </span><span class="pun">*</span><span class="pln"> og </span><span class="pun">*</span><span class="pln"> </span></code></li><li class="L7"><code class="language-python"><span class="pln">            </span><span class="pun">(</span><span class="lit">1</span><span class="pln"> </span><span class="pun">-</span><span class="pln"> tanh_c </span><span class="pun">*</span><span class="pln"> tanh_c</span><span class="pun">)</span><span class="pln"> </span><span class="pun">*</span><span class="pln"> ct </span><span class="pun">*</span></code></li><li class="L8"><code class="language-python"><span class="pln">            self</span><span class="pun">.</span><span class="pln">gate_activator</span><span class="pun">.</span><span class="pln">backward</span><span class="pun">(</span><span class="pln">ig</span><span class="pun">))</span></code></li><li class="L9"><code class="language-python"><span class="pln">        delta_ct </span><span class="pun">=</span><span class="pln"> </span><span class="pun">(</span><span class="pln">delta_k </span><span class="pun">*</span><span class="pln"> og </span><span class="pun">*</span><span class="pln"> </span></code></li><li class="L0"><code class="language-python"><span class="pln">            </span><span class="pun">(</span><span class="lit">1</span><span class="pln"> </span><span class="pun">-</span><span class="pln"> tanh_c </span><span class="pun">*</span><span class="pln"> tanh_c</span><span class="pun">)</span><span class="pln"> </span><span class="pun">*</span><span class="pln"> ig </span><span class="pun">*</span></code></li><li class="L1"><code class="language-python"><span class="pln">            self</span><span class="pun">.</span><span class="pln">output_activator</span><span class="pun">.</span><span class="pln">backward</span><span class="pun">(</span><span class="pln">ct</span><span class="pun">))</span></code></li><li class="L2"><code class="language-python"><span class="pln">        delta_h_prev </span><span class="pun">=</span><span class="pln"> </span><span class="pun">(</span></code></li><li class="L3"><code class="language-python"><span class="pln">                np</span><span class="pun">.</span><span class="pln">dot</span><span class="pun">(</span><span class="pln">delta_o</span><span class="pun">.</span><span class="pln">transpose</span><span class="pun">(),</span><span class="pln"> self</span><span class="pun">.</span><span class="typ">Woh</span><span class="pun">)</span><span class="pln"> </span><span class="pun">+</span></code></li><li class="L4"><code class="language-python"><span class="pln">                np</span><span class="pun">.</span><span class="pln">dot</span><span class="pun">(</span><span class="pln">delta_i</span><span class="pun">.</span><span class="pln">transpose</span><span class="pun">(),</span><span class="pln"> self</span><span class="pun">.</span><span class="typ">Wih</span><span class="pun">)</span><span class="pln"> </span><span class="pun">+</span></code></li><li class="L5"><code class="language-python"><span class="pln">                np</span><span class="pun">.</span><span class="pln">dot</span><span class="pun">(</span><span class="pln">delta_f</span><span class="pun">.</span><span class="pln">transpose</span><span class="pun">(),</span><span class="pln"> self</span><span class="pun">.</span><span class="typ">Wfh</span><span class="pun">)</span><span class="pln"> </span><span class="pun">+</span></code></li><li class="L6"><code class="language-python"><span class="pln">                np</span><span class="pun">.</span><span class="pln">dot</span><span class="pun">(</span><span class="pln">delta_ct</span><span class="pun">.</span><span class="pln">transpose</span><span class="pun">(),</span><span class="pln"> self</span><span class="pun">.</span><span class="typ">Wch</span><span class="pun">)</span></code></li><li class="L7"><code class="language-python"><span class="pln">            </span><span class="pun">).</span><span class="pln">transpose</span><span class="pun">()</span></code></li><li class="L8"><code class="language-python"></code></li><li class="L9"><code class="language-python"><span class="pln">        </span><span class="com"># 保存全部delta值</span></code></li><li class="L0"><code class="language-python"><span class="pln">        self</span><span class="pun">.</span><span class="pln">delta_h_list</span><span class="pun">[</span><span class="pln">k</span><span class="pun">-</span><span class="lit">1</span><span class="pun">]</span><span class="pln"> </span><span class="pun">=</span><span class="pln"> delta_h_prev</span></code></li><li class="L1"><code class="language-python"><span class="pln">        self</span><span class="pun">.</span><span class="pln">delta_f_list</span><span class="pun">[</span><span class="pln">k</span><span class="pun">]</span><span class="pln"> </span><span class="pun">=</span><span class="pln"> delta_f</span></code></li><li class="L2"><code class="language-python"><span class="pln">        self</span><span class="pun">.</span><span class="pln">delta_i_list</span><span class="pun">[</span><span class="pln">k</span><span class="pun">]</span><span class="pln"> </span><span class="pun">=</span><span class="pln"> delta_i</span></code></li><li class="L3"><code class="language-python"><span class="pln">        self</span><span class="pun">.</span><span class="pln">delta_o_list</span><span class="pun">[</span><span class="pln">k</span><span class="pun">]</span><span class="pln"> </span><span class="pun">=</span><span class="pln"> delta_o</span></code></li><li class="L4"><code class="language-python"><span class="pln">        self</span><span class="pun">.</span><span class="pln">delta_ct_list</span><span class="pun">[</span><span class="pln">k</span><span class="pun">]</span><span class="pln"> </span><span class="pun">=</span><span class="pln"> delta_ct</span></code></li></ol></pre><p data-anchor-id="41gn">另一部分是计算梯度：</p><div class="md-section-divider"></div><pre class="prettyprint linenums prettyprinted" data-anchor-id="rzpm" style=""><ol class="linenums"><li class="L0"><code class="language-python"><span class="pln">    </span><span class="kwd">def</span><span class="pln"> calc_gradient</span><span class="pun">(</span><span class="pln">self</span><span class="pun">,</span><span class="pln"> x</span><span class="pun">):</span></code></li><li class="L1"><code class="language-python"><span class="pln">        </span><span class="com"># 初始化遗忘门权重梯度矩阵和偏置项</span></code></li><li class="L2"><code class="language-python"><span class="pln">        self</span><span class="pun">.</span><span class="typ">Wfh_grad</span><span class="pun">,</span><span class="pln"> self</span><span class="pun">.</span><span class="typ">Wfx_grad</span><span class="pun">,</span><span class="pln"> self</span><span class="pun">.</span><span class="pln">bf_grad </span><span class="pun">=</span><span class="pln"> </span><span class="pun">(</span></code></li><li class="L3"><code class="language-python"><span class="pln">            self</span><span class="pun">.</span><span class="pln">init_weight_gradient_mat</span><span class="pun">())</span></code></li><li class="L4"><code class="language-python"><span class="pln">        </span><span class="com"># 初始化输入门权重梯度矩阵和偏置项</span></code></li><li class="L5"><code class="language-python"><span class="pln">        self</span><span class="pun">.</span><span class="typ">Wih_grad</span><span class="pun">,</span><span class="pln"> self</span><span class="pun">.</span><span class="typ">Wix_grad</span><span class="pun">,</span><span class="pln"> self</span><span class="pun">.</span><span class="pln">bi_grad </span><span class="pun">=</span><span class="pln"> </span><span class="pun">(</span></code></li><li class="L6"><code class="language-python"><span class="pln">            self</span><span class="pun">.</span><span class="pln">init_weight_gradient_mat</span><span class="pun">())</span></code></li><li class="L7"><code class="language-python"><span class="pln">        </span><span class="com"># 初始化输出门权重梯度矩阵和偏置项</span></code></li><li class="L8"><code class="language-python"><span class="pln">        self</span><span class="pun">.</span><span class="typ">Woh_grad</span><span class="pun">,</span><span class="pln"> self</span><span class="pun">.</span><span class="typ">Wox_grad</span><span class="pun">,</span><span class="pln"> self</span><span class="pun">.</span><span class="pln">bo_grad </span><span class="pun">=</span><span class="pln"> </span><span class="pun">(</span></code></li><li class="L9"><code class="language-python"><span class="pln">            self</span><span class="pun">.</span><span class="pln">init_weight_gradient_mat</span><span class="pun">())</span></code></li><li class="L0"><code class="language-python"><span class="pln">        </span><span class="com"># 初始化单元状态权重梯度矩阵和偏置项</span></code></li><li class="L1"><code class="language-python"><span class="pln">        self</span><span class="pun">.</span><span class="typ">Wch_grad</span><span class="pun">,</span><span class="pln"> self</span><span class="pun">.</span><span class="typ">Wcx_grad</span><span class="pun">,</span><span class="pln"> self</span><span class="pun">.</span><span class="pln">bc_grad </span><span class="pun">=</span><span class="pln"> </span><span class="pun">(</span></code></li><li class="L2"><code class="language-python"><span class="pln">            self</span><span class="pun">.</span><span class="pln">init_weight_gradient_mat</span><span class="pun">())</span></code></li><li class="L3"><code class="language-python"></code></li><li class="L4"><code class="language-python"><span class="pln">       </span><span class="com"># 计算对上一次输出h的权重梯度</span></code></li><li class="L5"><code class="language-python"><span class="pln">        </span><span class="kwd">for</span><span class="pln"> t </span><span class="kwd">in</span><span class="pln"> range</span><span class="pun">(</span><span class="pln">self</span><span class="pun">.</span><span class="pln">times</span><span class="pun">,</span><span class="pln"> </span><span class="lit">0</span><span class="pun">,</span><span class="pln"> </span><span class="pun">-</span><span class="lit">1</span><span class="pun">):</span></code></li><li class="L6"><code class="language-python"><span class="pln">            </span><span class="com"># 计算各个时刻的梯度</span></code></li><li class="L7"><code class="language-python"><span class="pln">            </span><span class="pun">(</span><span class="typ">Wfh_grad</span><span class="pun">,</span><span class="pln"> bf_grad</span><span class="pun">,</span></code></li><li class="L8"><code class="language-python"><span class="pln">            </span><span class="typ">Wih_grad</span><span class="pun">,</span><span class="pln"> bi_grad</span><span class="pun">,</span></code></li><li class="L9"><code class="language-python"><span class="pln">            </span><span class="typ">Woh_grad</span><span class="pun">,</span><span class="pln"> bo_grad</span><span class="pun">,</span></code></li><li class="L0"><code class="language-python"><span class="pln">            </span><span class="typ">Wch_grad</span><span class="pun">,</span><span class="pln"> bc_grad</span><span class="pun">)</span><span class="pln"> </span><span class="pun">=</span><span class="pln"> </span><span class="pun">(</span></code></li><li class="L1"><code class="language-python"><span class="pln">                self</span><span class="pun">.</span><span class="typ">calc_gradient_t</span><span class="pun">(</span><span class="pln">t</span><span class="pun">))</span></code></li><li class="L2"><code class="language-python"><span class="pln">            </span><span class="com"># 实际梯度是各时刻梯度之和</span></code></li><li class="L3"><code class="language-python"><span class="pln">            self</span><span class="pun">.</span><span class="typ">Wfh_grad</span><span class="pln"> </span><span class="pun">+=</span><span class="pln"> </span><span class="typ">Wfh_grad</span></code></li><li class="L4"><code class="language-python"><span class="pln">            self</span><span class="pun">.</span><span class="pln">bf_grad </span><span class="pun">+=</span><span class="pln"> bf_grad</span></code></li><li class="L5"><code class="language-python"><span class="pln">            self</span><span class="pun">.</span><span class="typ">Wih_grad</span><span class="pln"> </span><span class="pun">+=</span><span class="pln"> </span><span class="typ">Wih_grad</span></code></li><li class="L6"><code class="language-python"><span class="pln">            self</span><span class="pun">.</span><span class="pln">bi_grad </span><span class="pun">+=</span><span class="pln"> bi_grad</span></code></li><li class="L7"><code class="language-python"><span class="pln">            self</span><span class="pun">.</span><span class="typ">Woh_grad</span><span class="pln"> </span><span class="pun">+=</span><span class="pln"> </span><span class="typ">Woh_grad</span></code></li><li class="L8"><code class="language-python"><span class="pln">            self</span><span class="pun">.</span><span class="pln">bo_grad </span><span class="pun">+=</span><span class="pln"> bo_grad</span></code></li><li class="L9"><code class="language-python"><span class="pln">            self</span><span class="pun">.</span><span class="typ">Wch_grad</span><span class="pln"> </span><span class="pun">+=</span><span class="pln"> </span><span class="typ">Wch_grad</span></code></li><li class="L0"><code class="language-python"><span class="pln">            self</span><span class="pun">.</span><span class="pln">bc_grad </span><span class="pun">+=</span><span class="pln"> bc_grad</span></code></li><li class="L1"><code class="language-python"><span class="pln">            </span><span class="kwd">print</span><span class="pln"> </span><span class="str">'-----%d-----'</span><span class="pln"> </span><span class="pun">%</span><span class="pln"> t</span></code></li><li class="L2"><code class="language-python"><span class="pln">            </span><span class="kwd">print</span><span class="pln"> </span><span class="typ">Wfh_grad</span></code></li><li class="L3"><code class="language-python"><span class="pln">            </span><span class="kwd">print</span><span class="pln"> self</span><span class="pun">.</span><span class="typ">Wfh_grad</span></code></li><li class="L4"><code class="language-python"></code></li><li class="L5"><code class="language-python"><span class="pln">        </span><span class="com"># 计算对本次输入x的权重梯度</span></code></li><li class="L6"><code class="language-python"><span class="pln">        xt </span><span class="pun">=</span><span class="pln"> x</span><span class="pun">.</span><span class="pln">transpose</span><span class="pun">()</span></code></li><li class="L7"><code class="language-python"><span class="pln">        self</span><span class="pun">.</span><span class="typ">Wfx_grad</span><span class="pln"> </span><span class="pun">=</span><span class="pln"> np</span><span class="pun">.</span><span class="pln">dot</span><span class="pun">(</span><span class="pln">self</span><span class="pun">.</span><span class="pln">delta_f_list</span><span class="pun">[-</span><span class="lit">1</span><span class="pun">],</span><span class="pln"> xt</span><span class="pun">)</span></code></li><li class="L8"><code class="language-python"><span class="pln">        self</span><span class="pun">.</span><span class="typ">Wix_grad</span><span class="pln"> </span><span class="pun">=</span><span class="pln"> np</span><span class="pun">.</span><span class="pln">dot</span><span class="pun">(</span><span class="pln">self</span><span class="pun">.</span><span class="pln">delta_i_list</span><span class="pun">[-</span><span class="lit">1</span><span class="pun">],</span><span class="pln"> xt</span><span class="pun">)</span></code></li><li class="L9"><code class="language-python"><span class="pln">        self</span><span class="pun">.</span><span class="typ">Wox_grad</span><span class="pln"> </span><span class="pun">=</span><span class="pln"> np</span><span class="pun">.</span><span class="pln">dot</span><span class="pun">(</span><span class="pln">self</span><span class="pun">.</span><span class="pln">delta_o_list</span><span class="pun">[-</span><span class="lit">1</span><span class="pun">],</span><span class="pln"> xt</span><span class="pun">)</span></code></li><li class="L0"><code class="language-python"><span class="pln">        self</span><span class="pun">.</span><span class="typ">Wcx_grad</span><span class="pln"> </span><span class="pun">=</span><span class="pln"> np</span><span class="pun">.</span><span class="pln">dot</span><span class="pun">(</span><span class="pln">self</span><span class="pun">.</span><span class="pln">delta_ct_list</span><span class="pun">[-</span><span class="lit">1</span><span class="pun">],</span><span class="pln"> xt</span><span class="pun">)</span></code></li><li class="L1"><code class="language-python"></code></li><li class="L2"><code class="language-python"><span class="pln">    </span><span class="kwd">def</span><span class="pln"> init_weight_gradient_mat</span><span class="pun">(</span><span class="pln">self</span><span class="pun">):</span></code></li><li class="L3"><code class="language-python"><span class="pln">        </span><span class="str">'''</span></code></li><li class="L4"><code class="language-python"><span class="str">        初始化权重矩阵</span></code></li><li class="L5"><code class="language-python"><span class="str">        '''</span></code></li><li class="L6"><code class="language-python"><span class="pln">        </span><span class="typ">Wh_grad</span><span class="pln"> </span><span class="pun">=</span><span class="pln"> np</span><span class="pun">.</span><span class="pln">zeros</span><span class="pun">((</span><span class="pln">self</span><span class="pun">.</span><span class="pln">state_width</span><span class="pun">,</span></code></li><li class="L7"><code class="language-python"><span class="pln">            self</span><span class="pun">.</span><span class="pln">state_width</span><span class="pun">))</span></code></li><li class="L8"><code class="language-python"><span class="pln">        </span><span class="typ">Wx_grad</span><span class="pln"> </span><span class="pun">=</span><span class="pln"> np</span><span class="pun">.</span><span class="pln">zeros</span><span class="pun">((</span><span class="pln">self</span><span class="pun">.</span><span class="pln">state_width</span><span class="pun">,</span></code></li><li class="L9"><code class="language-python"><span class="pln">            self</span><span class="pun">.</span><span class="pln">input_width</span><span class="pun">))</span></code></li><li class="L0"><code class="language-python"><span class="pln">        b_grad </span><span class="pun">=</span><span class="pln"> np</span><span class="pun">.</span><span class="pln">zeros</span><span class="pun">((</span><span class="pln">self</span><span class="pun">.</span><span class="pln">state_width</span><span class="pun">,</span><span class="pln"> </span><span class="lit">1</span><span class="pun">))</span></code></li><li class="L1"><code class="language-python"><span class="pln">        </span><span class="kwd">return</span><span class="pln"> </span><span class="typ">Wh_grad</span><span class="pun">,</span><span class="pln"> </span><span class="typ">Wx_grad</span><span class="pun">,</span><span class="pln"> b_grad</span></code></li><li class="L2"><code class="language-python"></code></li><li class="L3"><code class="language-python"><span class="pln">    </span><span class="kwd">def</span><span class="pln"> </span><span class="typ">calc_gradient_t</span><span class="pun">(</span><span class="pln">self</span><span class="pun">,</span><span class="pln"> t</span><span class="pun">):</span></code></li><li class="L4"><code class="language-python"><span class="pln">        </span><span class="str">'''</span></code></li><li class="L5"><code class="language-python"><span class="str">        计算每个时刻t权重的梯度</span></code></li><li class="L6"><code class="language-python"><span class="str">        '''</span></code></li><li class="L7"><code class="language-python"><span class="pln">        h_prev </span><span class="pun">=</span><span class="pln"> self</span><span class="pun">.</span><span class="pln">h_list</span><span class="pun">[</span><span class="pln">t</span><span class="pun">-</span><span class="lit">1</span><span class="pun">].</span><span class="pln">transpose</span><span class="pun">()</span></code></li><li class="L8"><code class="language-python"><span class="pln">        </span><span class="typ">Wfh_grad</span><span class="pln"> </span><span class="pun">=</span><span class="pln"> np</span><span class="pun">.</span><span class="pln">dot</span><span class="pun">(</span><span class="pln">self</span><span class="pun">.</span><span class="pln">delta_f_list</span><span class="pun">[</span><span class="pln">t</span><span class="pun">],</span><span class="pln"> h_prev</span><span class="pun">)</span></code></li><li class="L9"><code class="language-python"><span class="pln">        bf_grad </span><span class="pun">=</span><span class="pln"> self</span><span class="pun">.</span><span class="pln">delta_f_list</span><span class="pun">[</span><span class="pln">t</span><span class="pun">]</span></code></li><li class="L0"><code class="language-python"><span class="pln">        </span><span class="typ">Wih_grad</span><span class="pln"> </span><span class="pun">=</span><span class="pln"> np</span><span class="pun">.</span><span class="pln">dot</span><span class="pun">(</span><span class="pln">self</span><span class="pun">.</span><span class="pln">delta_i_list</span><span class="pun">[</span><span class="pln">t</span><span class="pun">],</span><span class="pln"> h_prev</span><span class="pun">)</span></code></li><li class="L1"><code class="language-python"><span class="pln">        bi_grad </span><span class="pun">=</span><span class="pln"> self</span><span class="pun">.</span><span class="pln">delta_f_list</span><span class="pun">[</span><span class="pln">t</span><span class="pun">]</span></code></li><li class="L2"><code class="language-python"><span class="pln">        </span><span class="typ">Woh_grad</span><span class="pln"> </span><span class="pun">=</span><span class="pln"> np</span><span class="pun">.</span><span class="pln">dot</span><span class="pun">(</span><span class="pln">self</span><span class="pun">.</span><span class="pln">delta_o_list</span><span class="pun">[</span><span class="pln">t</span><span class="pun">],</span><span class="pln"> h_prev</span><span class="pun">)</span></code></li><li class="L3"><code class="language-python"><span class="pln">        bo_grad </span><span class="pun">=</span><span class="pln"> self</span><span class="pun">.</span><span class="pln">delta_f_list</span><span class="pun">[</span><span class="pln">t</span><span class="pun">]</span></code></li><li class="L4"><code class="language-python"><span class="pln">        </span><span class="typ">Wch_grad</span><span class="pln"> </span><span class="pun">=</span><span class="pln"> np</span><span class="pun">.</span><span class="pln">dot</span><span class="pun">(</span><span class="pln">self</span><span class="pun">.</span><span class="pln">delta_ct_list</span><span class="pun">[</span><span class="pln">t</span><span class="pun">],</span><span class="pln"> h_prev</span><span class="pun">)</span></code></li><li class="L5"><code class="language-python"><span class="pln">        bc_grad </span><span class="pun">=</span><span class="pln"> self</span><span class="pun">.</span><span class="pln">delta_ct_list</span><span class="pun">[</span><span class="pln">t</span><span class="pun">]</span></code></li><li class="L6"><code class="language-python"><span class="pln">        </span><span class="kwd">return</span><span class="pln"> </span><span class="typ">Wfh_grad</span><span class="pun">,</span><span class="pln"> bf_grad</span><span class="pun">,</span><span class="pln"> </span><span class="typ">Wih_grad</span><span class="pun">,</span><span class="pln"> bi_grad</span><span class="pun">,</span><span class="pln"> \</span></code></li><li class="L7"><code class="language-python"><span class="pln">               </span><span class="typ">Woh_grad</span><span class="pun">,</span><span class="pln"> bo_grad</span><span class="pun">,</span><span class="pln"> </span><span class="typ">Wch_grad</span><span class="pun">,</span><span class="pln"> bc_grad</span></code></li></ol></pre><div class="md-section-divider"></div><h3 data-anchor-id="7bft" id="梯度下降算法的实现">梯度下降算法的实现</h3><p data-anchor-id="zwj8">下面是用梯度下降算法来更新权重：</p><div class="md-section-divider"></div><pre class="prettyprint linenums prettyprinted" data-anchor-id="ogfr" style=""><ol class="linenums"><li class="L0"><code class="language-python"><span class="pln">    </span><span class="kwd">def</span><span class="pln"> update</span><span class="pun">(</span><span class="pln">self</span><span class="pun">):</span></code></li><li class="L1"><code class="language-python"><span class="pln">        </span><span class="str">'''</span></code></li><li class="L2"><code class="language-python"><span class="str">        按照梯度下降，更新权重</span></code></li><li class="L3"><code class="language-python"><span class="str">        '''</span></code></li><li class="L4"><code class="language-python"><span class="pln">        self</span><span class="pun">.</span><span class="typ">Wfh</span><span class="pln"> </span><span class="pun">-=</span><span class="pln"> self</span><span class="pun">.</span><span class="pln">learning_rate </span><span class="pun">*</span><span class="pln"> self</span><span class="pun">.</span><span class="typ">Whf_grad</span></code></li><li class="L5"><code class="language-python"><span class="pln">        self</span><span class="pun">.</span><span class="typ">Wfx</span><span class="pln"> </span><span class="pun">-=</span><span class="pln"> self</span><span class="pun">.</span><span class="pln">learning_rate </span><span class="pun">*</span><span class="pln"> self</span><span class="pun">.</span><span class="typ">Whx_grad</span></code></li><li class="L6"><code class="language-python"><span class="pln">        self</span><span class="pun">.</span><span class="pln">bf </span><span class="pun">-=</span><span class="pln"> self</span><span class="pun">.</span><span class="pln">learning_rate </span><span class="pun">*</span><span class="pln"> self</span><span class="pun">.</span><span class="pln">bf_grad</span></code></li><li class="L7"><code class="language-python"><span class="pln">        self</span><span class="pun">.</span><span class="typ">Wih</span><span class="pln"> </span><span class="pun">-=</span><span class="pln"> self</span><span class="pun">.</span><span class="pln">learning_rate </span><span class="pun">*</span><span class="pln"> self</span><span class="pun">.</span><span class="typ">Whi_grad</span></code></li><li class="L8"><code class="language-python"><span class="pln">        self</span><span class="pun">.</span><span class="typ">Wix</span><span class="pln"> </span><span class="pun">-=</span><span class="pln"> self</span><span class="pun">.</span><span class="pln">learning_rate </span><span class="pun">*</span><span class="pln"> self</span><span class="pun">.</span><span class="typ">Whi_grad</span></code></li><li class="L9"><code class="language-python"><span class="pln">        self</span><span class="pun">.</span><span class="pln">bi </span><span class="pun">-=</span><span class="pln"> self</span><span class="pun">.</span><span class="pln">learning_rate </span><span class="pun">*</span><span class="pln"> self</span><span class="pun">.</span><span class="pln">bi_grad</span></code></li><li class="L0"><code class="language-python"><span class="pln">        self</span><span class="pun">.</span><span class="typ">Woh</span><span class="pln"> </span><span class="pun">-=</span><span class="pln"> self</span><span class="pun">.</span><span class="pln">learning_rate </span><span class="pun">*</span><span class="pln"> self</span><span class="pun">.</span><span class="typ">Wof_grad</span></code></li><li class="L1"><code class="language-python"><span class="pln">        self</span><span class="pun">.</span><span class="typ">Wox</span><span class="pln"> </span><span class="pun">-=</span><span class="pln"> self</span><span class="pun">.</span><span class="pln">learning_rate </span><span class="pun">*</span><span class="pln"> self</span><span class="pun">.</span><span class="typ">Wox_grad</span></code></li><li class="L2"><code class="language-python"><span class="pln">        self</span><span class="pun">.</span><span class="pln">bo </span><span class="pun">-=</span><span class="pln"> self</span><span class="pun">.</span><span class="pln">learning_rate </span><span class="pun">*</span><span class="pln"> self</span><span class="pun">.</span><span class="pln">bo_grad</span></code></li><li class="L3"><code class="language-python"><span class="pln">        self</span><span class="pun">.</span><span class="typ">Wch</span><span class="pln"> </span><span class="pun">-=</span><span class="pln"> self</span><span class="pun">.</span><span class="pln">learning_rate </span><span class="pun">*</span><span class="pln"> self</span><span class="pun">.</span><span class="typ">Wcf_grad</span></code></li><li class="L4"><code class="language-python"><span class="pln">        self</span><span class="pun">.</span><span class="typ">Wcx</span><span class="pln"> </span><span class="pun">-=</span><span class="pln"> self</span><span class="pun">.</span><span class="pln">learning_rate </span><span class="pun">*</span><span class="pln"> self</span><span class="pun">.</span><span class="typ">Wcx_grad</span></code></li><li class="L5"><code class="language-python"><span class="pln">        self</span><span class="pun">.</span><span class="pln">bc </span><span class="pun">-=</span><span class="pln"> self</span><span class="pun">.</span><span class="pln">learning_rate </span><span class="pun">*</span><span class="pln"> self</span><span class="pun">.</span><span class="pln">bc_grad</span></code></li></ol></pre><div class="md-section-divider"></div><h3 data-anchor-id="7qaj" id="梯度检查的实现">梯度检查的实现</h3><p data-anchor-id="2qdg">和RecurrentLayer一样，为了支持梯度检查，我们需要支持重置内部状态：</p><div class="md-section-divider"></div><pre class="prettyprint linenums prettyprinted" data-anchor-id="1veg" style=""><ol class="linenums"><li class="L0"><code class="language-python"><span class="pln">    </span><span class="kwd">def</span><span class="pln"> reset_state</span><span class="pun">(</span><span class="pln">self</span><span class="pun">):</span></code></li><li class="L1"><code class="language-python"><span class="pln">        </span><span class="com"># 当前时刻初始化为t0</span></code></li><li class="L2"><code class="language-python"><span class="pln">        self</span><span class="pun">.</span><span class="pln">times </span><span class="pun">=</span><span class="pln"> </span><span class="lit">0</span><span class="pln">       </span></code></li><li class="L3"><code class="language-python"><span class="pln">        </span><span class="com"># 各个时刻的单元状态向量c</span></code></li><li class="L4"><code class="language-python"><span class="pln">        self</span><span class="pun">.</span><span class="pln">c_list </span><span class="pun">=</span><span class="pln"> self</span><span class="pun">.</span><span class="pln">init_state_vec</span><span class="pun">()</span></code></li><li class="L5"><code class="language-python"><span class="pln">        </span><span class="com"># 各个时刻的输出向量h</span></code></li><li class="L6"><code class="language-python"><span class="pln">        self</span><span class="pun">.</span><span class="pln">h_list </span><span class="pun">=</span><span class="pln"> self</span><span class="pun">.</span><span class="pln">init_state_vec</span><span class="pun">()</span></code></li><li class="L7"><code class="language-python"><span class="pln">        </span><span class="com"># 各个时刻的遗忘门f</span></code></li><li class="L8"><code class="language-python"><span class="pln">        self</span><span class="pun">.</span><span class="pln">f_list </span><span class="pun">=</span><span class="pln"> self</span><span class="pun">.</span><span class="pln">init_state_vec</span><span class="pun">()</span></code></li><li class="L9"><code class="language-python"><span class="pln">        </span><span class="com"># 各个时刻的输入门i</span></code></li><li class="L0"><code class="language-python"><span class="pln">        self</span><span class="pun">.</span><span class="pln">i_list </span><span class="pun">=</span><span class="pln"> self</span><span class="pun">.</span><span class="pln">init_state_vec</span><span class="pun">()</span></code></li><li class="L1"><code class="language-python"><span class="pln">        </span><span class="com"># 各个时刻的输出门o</span></code></li><li class="L2"><code class="language-python"><span class="pln">        self</span><span class="pun">.</span><span class="pln">o_list </span><span class="pun">=</span><span class="pln"> self</span><span class="pun">.</span><span class="pln">init_state_vec</span><span class="pun">()</span></code></li><li class="L3"><code class="language-python"><span class="pln">        </span><span class="com"># 各个时刻的即时状态c~</span></code></li><li class="L4"><code class="language-python"><span class="pln">        self</span><span class="pun">.</span><span class="pln">ct_list </span><span class="pun">=</span><span class="pln"> self</span><span class="pun">.</span><span class="pln">init_state_vec</span><span class="pun">()</span></code></li></ol></pre><p data-anchor-id="t595">最后，是梯度检查的代码：</p><div class="md-section-divider"></div><pre class="prettyprint linenums prettyprinted" data-anchor-id="sb54" style=""><ol class="linenums"><li class="L0"><code class="language-python"><span class="kwd">def</span><span class="pln"> data_set</span><span class="pun">():</span></code></li><li class="L1"><code class="language-python"><span class="pln">    x </span><span class="pun">=</span><span class="pln"> </span><span class="pun">[</span><span class="pln">np</span><span class="pun">.</span><span class="pln">array</span><span class="pun">([[</span><span class="lit">1</span><span class="pun">],</span><span class="pln"> </span><span class="pun">[</span><span class="lit">2</span><span class="pun">],</span><span class="pln"> </span><span class="pun">[</span><span class="lit">3</span><span class="pun">]]),</span></code></li><li class="L2"><code class="language-python"><span class="pln">         np</span><span class="pun">.</span><span class="pln">array</span><span class="pun">([[</span><span class="lit">2</span><span class="pun">],</span><span class="pln"> </span><span class="pun">[</span><span class="lit">3</span><span class="pun">],</span><span class="pln"> </span><span class="pun">[</span><span class="lit">4</span><span class="pun">]])]</span></code></li><li class="L3"><code class="language-python"><span class="pln">    d </span><span class="pun">=</span><span class="pln"> np</span><span class="pun">.</span><span class="pln">array</span><span class="pun">([[</span><span class="lit">1</span><span class="pun">],</span><span class="pln"> </span><span class="pun">[</span><span class="lit">2</span><span class="pun">]])</span></code></li><li class="L4"><code class="language-python"><span class="pln">    </span><span class="kwd">return</span><span class="pln"> x</span><span class="pun">,</span><span class="pln"> d</span></code></li><li class="L5"><code class="language-python"></code></li><li class="L6"><code class="language-python"><span class="kwd">def</span><span class="pln"> gradient_check</span><span class="pun">():</span></code></li><li class="L7"><code class="language-python"><span class="pln">    </span><span class="str">'''</span></code></li><li class="L8"><code class="language-python"><span class="str">    梯度检查</span></code></li><li class="L9"><code class="language-python"><span class="str">    '''</span></code></li><li class="L0"><code class="language-python"><span class="pln">    </span><span class="com"># 设计一个误差函数，取所有节点输出项之和</span></code></li><li class="L1"><code class="language-python"><span class="pln">    error_function </span><span class="pun">=</span><span class="pln"> </span><span class="kwd">lambda</span><span class="pln"> o</span><span class="pun">:</span><span class="pln"> o</span><span class="pun">.</span><span class="pln">sum</span><span class="pun">()</span></code></li><li class="L2"><code class="language-python"></code></li><li class="L3"><code class="language-python"><span class="pln">    lstm </span><span class="pun">=</span><span class="pln"> </span><span class="typ">LstmLayer</span><span class="pun">(</span><span class="lit">3</span><span class="pun">,</span><span class="pln"> </span><span class="lit">2</span><span class="pun">,</span><span class="pln"> </span><span class="lit">1e-3</span><span class="pun">)</span></code></li><li class="L4"><code class="language-python"></code></li><li class="L5"><code class="language-python"><span class="pln">    </span><span class="com"># 计算forward值</span></code></li><li class="L6"><code class="language-python"><span class="pln">    x</span><span class="pun">,</span><span class="pln"> d </span><span class="pun">=</span><span class="pln"> data_set</span><span class="pun">()</span></code></li><li class="L7"><code class="language-python"><span class="pln">    lstm</span><span class="pun">.</span><span class="pln">forward</span><span class="pun">(</span><span class="pln">x</span><span class="pun">[</span><span class="lit">0</span><span class="pun">])</span></code></li><li class="L8"><code class="language-python"><span class="pln">    lstm</span><span class="pun">.</span><span class="pln">forward</span><span class="pun">(</span><span class="pln">x</span><span class="pun">[</span><span class="lit">1</span><span class="pun">])</span></code></li><li class="L9"><code class="language-python"></code></li><li class="L0"><code class="language-python"><span class="pln">    </span><span class="com"># 求取sensitivity map</span></code></li><li class="L1"><code class="language-python"><span class="pln">    sensitivity_array </span><span class="pun">=</span><span class="pln"> np</span><span class="pun">.</span><span class="pln">ones</span><span class="pun">(</span><span class="pln">lstm</span><span class="pun">.</span><span class="pln">h_list</span><span class="pun">[-</span><span class="lit">1</span><span class="pun">].</span><span class="pln">shape</span><span class="pun">,</span></code></li><li class="L2"><code class="language-python"><span class="pln">                                dtype</span><span class="pun">=</span><span class="pln">np</span><span class="pun">.</span><span class="pln">float64</span><span class="pun">)</span></code></li><li class="L3"><code class="language-python"><span class="pln">    </span><span class="com"># 计算梯度</span></code></li><li class="L4"><code class="language-python"><span class="pln">    lstm</span><span class="pun">.</span><span class="pln">backward</span><span class="pun">(</span><span class="pln">x</span><span class="pun">[</span><span class="lit">1</span><span class="pun">],</span><span class="pln"> sensitivity_array</span><span class="pun">,</span><span class="pln"> </span><span class="typ">IdentityActivator</span><span class="pun">())</span></code></li><li class="L5"><code class="language-python"></code></li><li class="L6"><code class="language-python"><span class="pln">    </span><span class="com"># 检查梯度</span></code></li><li class="L7"><code class="language-python"><span class="pln">    epsilon </span><span class="pun">=</span><span class="pln"> </span><span class="lit">10e-4</span></code></li><li class="L8"><code class="language-python"><span class="pln">    </span><span class="kwd">for</span><span class="pln"> i </span><span class="kwd">in</span><span class="pln"> range</span><span class="pun">(</span><span class="pln">lstm</span><span class="pun">.</span><span class="typ">Wfh</span><span class="pun">.</span><span class="pln">shape</span><span class="pun">[</span><span class="lit">0</span><span class="pun">]):</span></code></li><li class="L9"><code class="language-python"><span class="pln">        </span><span class="kwd">for</span><span class="pln"> j </span><span class="kwd">in</span><span class="pln"> range</span><span class="pun">(</span><span class="pln">lstm</span><span class="pun">.</span><span class="typ">Wfh</span><span class="pun">.</span><span class="pln">shape</span><span class="pun">[</span><span class="lit">1</span><span class="pun">]):</span></code></li><li class="L0"><code class="language-python"><span class="pln">            lstm</span><span class="pun">.</span><span class="typ">Wfh</span><span class="pun">[</span><span class="pln">i</span><span class="pun">,</span><span class="pln">j</span><span class="pun">]</span><span class="pln"> </span><span class="pun">+=</span><span class="pln"> epsilon</span></code></li><li class="L1"><code class="language-python"><span class="pln">            lstm</span><span class="pun">.</span><span class="pln">reset_state</span><span class="pun">()</span></code></li><li class="L2"><code class="language-python"><span class="pln">            lstm</span><span class="pun">.</span><span class="pln">forward</span><span class="pun">(</span><span class="pln">x</span><span class="pun">[</span><span class="lit">0</span><span class="pun">])</span></code></li><li class="L3"><code class="language-python"><span class="pln">            lstm</span><span class="pun">.</span><span class="pln">forward</span><span class="pun">(</span><span class="pln">x</span><span class="pun">[</span><span class="lit">1</span><span class="pun">])</span></code></li><li class="L4"><code class="language-python"><span class="pln">            err1 </span><span class="pun">=</span><span class="pln"> error_function</span><span class="pun">(</span><span class="pln">lstm</span><span class="pun">.</span><span class="pln">h_list</span><span class="pun">[-</span><span class="lit">1</span><span class="pun">])</span></code></li><li class="L5"><code class="language-python"><span class="pln">            lstm</span><span class="pun">.</span><span class="typ">Wfh</span><span class="pun">[</span><span class="pln">i</span><span class="pun">,</span><span class="pln">j</span><span class="pun">]</span><span class="pln"> </span><span class="pun">-=</span><span class="pln"> </span><span class="lit">2</span><span class="pun">*</span><span class="pln">epsilon</span></code></li><li class="L6"><code class="language-python"><span class="pln">            lstm</span><span class="pun">.</span><span class="pln">reset_state</span><span class="pun">()</span></code></li><li class="L7"><code class="language-python"><span class="pln">            lstm</span><span class="pun">.</span><span class="pln">forward</span><span class="pun">(</span><span class="pln">x</span><span class="pun">[</span><span class="lit">0</span><span class="pun">])</span></code></li><li class="L8"><code class="language-python"><span class="pln">            lstm</span><span class="pun">.</span><span class="pln">forward</span><span class="pun">(</span><span class="pln">x</span><span class="pun">[</span><span class="lit">1</span><span class="pun">])</span></code></li><li class="L9"><code class="language-python"><span class="pln">            err2 </span><span class="pun">=</span><span class="pln"> error_function</span><span class="pun">(</span><span class="pln">lstm</span><span class="pun">.</span><span class="pln">h_list</span><span class="pun">[-</span><span class="lit">1</span><span class="pun">])</span></code></li><li class="L0"><code class="language-python"><span class="pln">            expect_grad </span><span class="pun">=</span><span class="pln"> </span><span class="pun">(</span><span class="pln">err1 </span><span class="pun">-</span><span class="pln"> err2</span><span class="pun">)</span><span class="pln"> </span><span class="pun">/</span><span class="pln"> </span><span class="pun">(</span><span class="lit">2</span><span class="pln"> </span><span class="pun">*</span><span class="pln"> epsilon</span><span class="pun">)</span></code></li><li class="L1"><code class="language-python"><span class="pln">            lstm</span><span class="pun">.</span><span class="typ">Wfh</span><span class="pun">[</span><span class="pln">i</span><span class="pun">,</span><span class="pln">j</span><span class="pun">]</span><span class="pln"> </span><span class="pun">+=</span><span class="pln"> epsilon</span></code></li><li class="L2"><code class="language-python"><span class="pln">            </span><span class="kwd">print</span><span class="pln"> </span><span class="str">'weights(%d,%d): expected - actural %.4e - %.4e'</span><span class="pln"> </span><span class="pun">%</span><span class="pln"> </span><span class="pun">(</span></code></li><li class="L3"><code class="language-python"><span class="pln">                i</span><span class="pun">,</span><span class="pln"> j</span><span class="pun">,</span><span class="pln"> expect_grad</span><span class="pun">,</span><span class="pln"> lstm</span><span class="pun">.</span><span class="typ">Wfh_grad</span><span class="pun">[</span><span class="pln">i</span><span class="pun">,</span><span class="pln">j</span><span class="pun">])</span></code></li><li class="L4"><code class="language-python"><span class="pln">    </span><span class="kwd">return</span><span class="pln"> lstm</span></code></li></ol></pre><p data-anchor-id="hvre">我们只对<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processing" id="MathJax-Element-195-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"></span><script type="math/tex" id="MathJax-Element-195">W_{fh}</script>做了检查，读者可以自行增加对其他梯度的检查。下面是某次梯度检查的结果：</p><p data-anchor-id="fwnt"><img src="./零基础入门深度学习(6) - 长短时记忆网络(LSTM) - 作业部落 Cmd Markdown 编辑阅读器_files/2256672-cb1c4561375c22a1.png" alt="" title=""></p><div class="md-section-divider"></div><h2 data-anchor-id="uiry" id="gru">GRU</h2><p data-anchor-id="7ytc">前面我们讲了一种普通的LSTM，事实上LSTM存在很多<strong>变体</strong>，许多论文中的LSTM都或多或少的不太一样。在众多的LSTM变体中，<strong>GRU (Gated Recurrent Unit)</strong>也许是最成功的一种。它对LSTM做了很多简化，同时却保持着和LSTM相同的效果。因此，GRU最近变得越来越流行。</p><p data-anchor-id="okeu">GRU对LSTM做了两个大改动：</p><ol data-anchor-id="0843">
<li>将输入门、遗忘门、输出门变为两个门：更新门（Update Gate）<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processing" id="MathJax-Element-196-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"></span><script type="math/tex" id="MathJax-Element-196">\mathbf{z}_t</script>和重置门（Reset Gate）<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processing" id="MathJax-Element-197-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"></span><script type="math/tex" id="MathJax-Element-197">\mathbf{r}_t</script>。</li>
<li>将单元状态与输出合并为一个状态：<span class="MathJax_Preview"></span><span class="MathJax_SVG MathJax_SVG_Processing" id="MathJax-Element-198-Frame" role="textbox" aria-readonly="true" style="font-size: 100%; display: inline-block;"></span><script type="math/tex" id="MathJax-Element-198">\mathbf{h}</script>。</li>
</ol><p data-anchor-id="ndy6">GRU的前向计算公式为：</p><div class="md-section-divider"></div><p data-anchor-id="tb74"><span class="MathJax_Preview"></span></p><div class="MathJax_SVG_Display MathJax_SVG_Processing" role="textbox" aria-readonly="true"><span class="MathJax_SVG" id="MathJax-Element-199-Frame" style="font-size: 100%; display: inline-block;"></span></div><script type="math/tex; mode=display" id="MathJax-Element-199">
\begin{align}
\mathbf{z}_t&=\sigma(W_z\cdot[\mathbf{h}_{t-1},\mathbf{x}_t])\\
\mathbf{r}_t&=\sigma(W_r\cdot[\mathbf{h}_{t-1},\mathbf{x}_t])\\
\mathbf{\tilde{h}}_t&=\tanh(W\cdot[\mathbf{r}_t\circ\mathbf{h}_{t-1},\mathbf{x}_t])\\
\mathbf{h}&=(1-\mathbf{z}_t)\circ\mathbf{h}_{t-1}+\mathbf{z}_t\circ\mathbf{\tilde{h}}_t
\end{align}
</script><p></p><p data-anchor-id="ckgq">下图是GRU的示意图：</p><p data-anchor-id="1kik"><img src="./零基础入门深度学习(6) - 长短时记忆网络(LSTM) - 作业部落 Cmd Markdown 编辑阅读器_files/2256672-b784d887bf693253.png" alt="" title=""></p><p data-anchor-id="t8tx">GRU的训练算法比LSTM简单一些，留给读者自行推导，本文就不再赘述了。 </p><div class="md-section-divider"></div><h2 data-anchor-id="yf6b" id="小结">小结</h2><p data-anchor-id="ky0r">至此，LSTM——也许是结构最复杂的一类神经网络——就讲完了，相信拿下前几篇文章的读者们搞定这篇文章也不在话下吧！现在我们已经了解<strong>循环神经网络</strong>和它最流行的变体——<strong>LSTM</strong>，它们都可以用来处理序列。但是，有时候仅仅拥有处理序列的能力还不够，还需要处理比序列更为复杂的结构（比如树结构），这时候就需要用到另外一类网络：<strong>递归神经网络(Recursive Neural Network)</strong>，巧合的是，它的缩写也是<strong>RNN</strong>。在下一篇文章中，我们将介绍<strong>递归神经网络</strong>和它的训练算法。现在，漫长的烧脑暂告一段落，休息一下吧:)</p><p data-anchor-id="1qrg"><img src="./零基础入门深度学习(6) - 长短时记忆网络(LSTM) - 作业部落 Cmd Markdown 编辑阅读器_files/2256672-9ba33f65294bfb98.jpg" alt="" title=""></p><div class="md-section-divider"></div><h2 data-anchor-id="5exu" id="参考资料">参考资料</h2><ol data-anchor-id="yklw">
<li><a href="http://cs224d.stanford.edu/" target="_blank">CS224d: Deep Learning for Natural Language Processing</a></li>
<li><a href="http://colah.github.io/posts/2015-08-Understanding-LSTMs/" target="_blank">Understanding LSTM Networks</a></li>
<li><a href="http://arunmallya.github.io/writeups/nn/lstm/index.html" target="_blank">LSTM Forward and Backward Pass</a></li>
</ol></div>
    <div class="remark-icons">
    </div>
</div>

<!--in page preview buttons. -->
<div class="in-page-preview-buttons in-page-preview-buttons-full-reader">
    <ul>
        <li class="in-page-button dropdown" id="preview-toc-button" title="内容目录 Ctrl+Alt+O">
            <span class="dropdown-toggle icon-list" data-toggle="dropdown" style="color: rgba(102, 128, 153, 0.45);"></span>
            <div id="toc-list" class="dropdown-menu theme pull-right theme-white"> <!-- Add theme means this element will be changed when apply theme color. -->
                <h3>内容目录</h3>
                <hr>
                <div class="table-of-contents"><div class="toc">
<ul>
<li><a href="https://zybuluo.com/hanbingtao/note/581764#%E9%9B%B6%E5%9F%BA%E7%A1%80%E5%85%A5%E9%97%A8%E6%B7%B1%E5%BA%A6%E5%AD%A6%E4%B9%A06-%E9%95%BF%E7%9F%AD%E6%97%B6%E8%AE%B0%E5%BF%86%E7%BD%91%E7%BB%9Clstm">零基础入门深度学习(6) - 长短时记忆网络(LSTM)</a><ul>
<li><a href="https://zybuluo.com/hanbingtao/note/581764#%E6%96%87%E7%AB%A0%E5%88%97%E8%A1%A8">文章列表</a></li>
<li><a href="https://zybuluo.com/hanbingtao/note/581764#%E5%BE%80%E6%9C%9F%E5%9B%9E%E9%A1%BE">往期回顾</a></li>
<li><a href="https://zybuluo.com/hanbingtao/note/581764#%E9%95%BF%E7%9F%AD%E6%97%B6%E8%AE%B0%E5%BF%86%E7%BD%91%E7%BB%9C%E6%98%AF%E5%95%A5">长短时记忆网络是啥</a></li>
<li><a href="https://zybuluo.com/hanbingtao/note/581764#%E9%95%BF%E7%9F%AD%E6%97%B6%E8%AE%B0%E5%BF%86%E7%BD%91%E7%BB%9C%E7%9A%84%E5%89%8D%E5%90%91%E8%AE%A1%E7%AE%97">长短时记忆网络的前向计算</a></li>
<li><a href="https://zybuluo.com/hanbingtao/note/581764#%E9%95%BF%E7%9F%AD%E6%97%B6%E8%AE%B0%E5%BF%86%E7%BD%91%E7%BB%9C%E7%9A%84%E8%AE%AD%E7%BB%83">长短时记忆网络的训练</a><ul>
<li><a href="https://zybuluo.com/hanbingtao/note/581764#lstm%E8%AE%AD%E7%BB%83%E7%AE%97%E6%B3%95%E6%A1%86%E6%9E%B6">LSTM训练算法框架</a></li>
<li><a href="https://zybuluo.com/hanbingtao/note/581764#%E5%85%B3%E4%BA%8E%E5%85%AC%E5%BC%8F%E5%92%8C%E7%AC%A6%E5%8F%B7%E7%9A%84%E8%AF%B4%E6%98%8E">关于公式和符号的说明</a></li>
<li><a href="https://zybuluo.com/hanbingtao/note/581764#%E8%AF%AF%E5%B7%AE%E9%A1%B9%E6%B2%BF%E6%97%B6%E9%97%B4%E7%9A%84%E5%8F%8D%E5%90%91%E4%BC%A0%E9%80%92">误差项沿时间的反向传递</a></li>
<li><a href="https://zybuluo.com/hanbingtao/note/581764#%E5%B0%86%E8%AF%AF%E5%B7%AE%E9%A1%B9%E4%BC%A0%E9%80%92%E5%88%B0%E4%B8%8A%E4%B8%80%E5%B1%82">将误差项传递到上一层</a></li>
<li><a href="https://zybuluo.com/hanbingtao/note/581764#%E6%9D%83%E9%87%8D%E6%A2%AF%E5%BA%A6%E7%9A%84%E8%AE%A1%E7%AE%97">权重梯度的计算</a></li>
</ul>
</li>
<li><a href="https://zybuluo.com/hanbingtao/note/581764#%E9%95%BF%E7%9F%AD%E6%97%B6%E8%AE%B0%E5%BF%86%E7%BD%91%E7%BB%9C%E7%9A%84%E5%AE%9E%E7%8E%B0">长短时记忆网络的实现</a><ul>
<li><a href="https://zybuluo.com/hanbingtao/note/581764#%E6%BF%80%E6%B4%BB%E5%87%BD%E6%95%B0%E7%9A%84%E5%AE%9E%E7%8E%B0">激活函数的实现</a></li>
<li><a href="https://zybuluo.com/hanbingtao/note/581764#lstm%E5%88%9D%E5%A7%8B%E5%8C%96">LSTM初始化</a></li>
<li><a href="https://zybuluo.com/hanbingtao/note/581764#%E5%89%8D%E5%90%91%E8%AE%A1%E7%AE%97%E7%9A%84%E5%AE%9E%E7%8E%B0">前向计算的实现</a></li>
<li><a href="https://zybuluo.com/hanbingtao/note/581764#%E5%8F%8D%E5%90%91%E4%BC%A0%E6%92%AD%E7%AE%97%E6%B3%95%E7%9A%84%E5%AE%9E%E7%8E%B0">反向传播算法的实现</a></li>
<li><a href="https://zybuluo.com/hanbingtao/note/581764#%E6%A2%AF%E5%BA%A6%E4%B8%8B%E9%99%8D%E7%AE%97%E6%B3%95%E7%9A%84%E5%AE%9E%E7%8E%B0">梯度下降算法的实现</a></li>
<li><a href="https://zybuluo.com/hanbingtao/note/581764#%E6%A2%AF%E5%BA%A6%E6%A3%80%E6%9F%A5%E7%9A%84%E5%AE%9E%E7%8E%B0">梯度检查的实现</a></li>
</ul>
</li>
<li><a href="https://zybuluo.com/hanbingtao/note/581764#gru">GRU</a></li>
<li><a href="https://zybuluo.com/hanbingtao/note/581764#%E5%B0%8F%E7%BB%93">小结</a></li>
<li><a href="https://zybuluo.com/hanbingtao/note/581764#%E5%8F%82%E8%80%83%E8%B5%84%E6%96%99">参考资料</a></li>
</ul>
</li>
</ul>
</div>
</div>
            </div>
        </li>
    </ul>
</div>

<div id="reader-full-toolbar" class="reader-full-toolbar-shown" style="padding-top: 0;">
    <ul id="reader-full-toolbar-home" class="preview-button-row">
        <li class="preview-button-full-reader" id="preview-editor-button" title="撰写文本 Ctrl+Alt+M">
            <span class="icon-pencil" style="color: rgb(187, 187, 187);"></span>
        </li>
    </ul>
    <ul id="preview-button-row" class="preview-button-row">
        <li class="preview-button-full-reader dropdown" id="preview-list-button" title="文本列表 Ctrl+Alt+F">
            <span class="dropdown-toggle icon-reorder" data-toggle="dropdown" style="color: rgb(187, 187, 187);"></span>
            <ul id="file-list" class="dropdown-menu theme-black pull-right" role="menu" style="max-height: 652px;">
                    <li>
                    <ul class="tag-list">
                        <li class="tag-item item" tag-name="机器学习">
                            <span class="pull-left"><i class="icon-tag"></i><span class="tag-name">机器学习</span></span>
                            <span class="tag-count pull-right">7</span>
                            <div class="clearfix"></div>
                        </li>
                            
    <li class="file-item item" file-created-date="2017-02-28 00:59:57">
        <a tabindex="-1" href="https://zybuluo.com/hanbingtao/note/626300" title="【已发布】 2017-08-29 23:41">
        <i class="icon-share-sign"></i>
        <span id="626300">零基础入门深度学习(7) - 递归神经网络</span>
        </a>
    </li>

                            
    <li class="file-item item" file-created-date="2017-01-08 23:28:36">
        <a tabindex="-1" href="https://zybuluo.com/hanbingtao/note/581764" title="【已发布】 2017-08-28 19:55">
        <i class="icon-share-sign"></i>
        <span id="581764" class="whiter-on-black">零基础入门深度学习(6) - 长短时记忆网络(LSTM)</span>
        </a>
    </li>

                            
    <li class="file-item item" file-created-date="2016-11-05 19:16:51">
        <a tabindex="-1" href="https://zybuluo.com/hanbingtao/note/541458" title="【已发布】 2017-08-28 19:54">
        <i class="icon-share-sign"></i>
        <span id="541458">零基础入门深度学习(5) - 循环神经网络</span>
        </a>
    </li>

                            
    <li class="file-item item" file-created-date="2016-10-09 20:30:46">
        <a tabindex="-1" href="https://zybuluo.com/hanbingtao/note/485480" title="【已发布】 2017-08-28 19:53">
        <i class="icon-share-sign"></i>
        <span id="485480">零基础入门深度学习(4) - 卷积神经网络</span>
        </a>
    </li>

                            
    <li class="file-item item" file-created-date="2016-08-24 21:39:25">
        <a tabindex="-1" href="https://zybuluo.com/hanbingtao/note/476663" title="【已发布】 2017-10-17 22:25">
        <i class="icon-share-sign"></i>
        <span id="476663">零基础入门深度学习(3) - 神经网络和反向传播算法</span>
        </a>
    </li>

                            
    <li class="file-item item" file-created-date="2016-07-26 01:44:30">
        <a tabindex="-1" href="https://zybuluo.com/hanbingtao/note/448086" title="【已发布】 2017-08-28 19:40">
        <i class="icon-share-sign"></i>
        <span id="448086">零基础入门深度学习(2) - 线性单元和梯度下降</span>
        </a>
    </li>

                            
    <li class="file-item item" file-created-date="2016-07-12 11:10:42">
        <a tabindex="-1" href="https://zybuluo.com/hanbingtao/note/433855" title="【已发布】 2017-08-28 19:35">
        <i class="icon-share-sign"></i>
        <span id="433855">零基础入门深度学习(1) - 感知器</span>
        </a>
    </li>

                    </ul>
                    </li>
                    <li>
                    <ul class="tag-list">
                        <li class="tag-item item" tag-name="深度学习入门">
                            <span class="pull-left"><i class="icon-tag"></i><span class="tag-name">深度学习入门</span></span>
                            <span class="tag-count pull-right">7</span>
                            <div class="clearfix"></div>
                        </li>
                            
    <li class="file-item item" file-created-date="2017-02-28 00:59:57">
        <a tabindex="-1" href="https://zybuluo.com/hanbingtao/note/626300" title="【已发布】 2017-08-29 23:41">
        <i class="icon-share-sign"></i>
        <span id="626300">零基础入门深度学习(7) - 递归神经网络</span>
        </a>
    </li>

                            
    <li class="file-item item" file-created-date="2017-01-08 23:28:36">
        <a tabindex="-1" href="https://zybuluo.com/hanbingtao/note/581764" title="【已发布】 2017-08-28 19:55">
        <i class="icon-share-sign"></i>
        <span id="581764" class="whiter-on-black">零基础入门深度学习(6) - 长短时记忆网络(LSTM)</span>
        </a>
    </li>

                            
    <li class="file-item item" file-created-date="2016-11-05 19:16:51">
        <a tabindex="-1" href="https://zybuluo.com/hanbingtao/note/541458" title="【已发布】 2017-08-28 19:54">
        <i class="icon-share-sign"></i>
        <span id="541458">零基础入门深度学习(5) - 循环神经网络</span>
        </a>
    </li>

                            
    <li class="file-item item" file-created-date="2016-10-09 20:30:46">
        <a tabindex="-1" href="https://zybuluo.com/hanbingtao/note/485480" title="【已发布】 2017-08-28 19:53">
        <i class="icon-share-sign"></i>
        <span id="485480">零基础入门深度学习(4) - 卷积神经网络</span>
        </a>
    </li>

                            
    <li class="file-item item" file-created-date="2016-08-24 21:39:25">
        <a tabindex="-1" href="https://zybuluo.com/hanbingtao/note/476663" title="【已发布】 2017-10-17 22:25">
        <i class="icon-share-sign"></i>
        <span id="476663">零基础入门深度学习(3) - 神经网络和反向传播算法</span>
        </a>
    </li>

                            
    <li class="file-item item" file-created-date="2016-07-26 01:44:30">
        <a tabindex="-1" href="https://zybuluo.com/hanbingtao/note/448086" title="【已发布】 2017-08-28 19:40">
        <i class="icon-share-sign"></i>
        <span id="448086">零基础入门深度学习(2) - 线性单元和梯度下降</span>
        </a>
    </li>

                            
    <li class="file-item item" file-created-date="2016-07-12 11:10:42">
        <a tabindex="-1" href="https://zybuluo.com/hanbingtao/note/433855" title="【已发布】 2017-08-28 19:35">
        <i class="icon-share-sign"></i>
        <span id="433855">零基础入门深度学习(1) - 感知器</span>
        </a>
    </li>

                    </ul>
                    </li>
            </ul>
            <ul id="file-list-topbar" class="dropdown-menu theme-black pull-right" role="menu">
                <li id="search-file-bar">
                    <i class="icon-search icon-large"></i>
                    <input type="text" id="search-file-textbox" placeholder="搜索 hanbingtao 的文稿标题， * 显示全部">
                    <i class="icon-level-down icon-rotate-90 icon-large"></i>
                </li>
                <li id="tag-file-bar">
                    以下【标签】将用于标记这篇文稿：
                </li>
            </ul>
        </li>
        <li class="preview-button-full-reader" id="preview-theme-button" title="主题切换 Ctrl+Alt+Y">
            <span class="icon-adjust" style="color: rgb(187, 187, 187);"></span>
        </li>
        <li class="preview-button-full-reader" id="preview-fullscreen-button" title="全屏模式 F11">
            <span class="icon-fullscreen" style="color: rgb(187, 187, 187);"></span>
        </li>
        <li class="preview-button-full-reader wmd-spacer"></li>
        <li class="preview-button-full-reader dropdown" id="preview-about-button" title="关于本站">
            <span class="dropdown-toggle icon-info-sign" data-toggle="dropdown" data-hover="dropdown" data-delay="100" data-close-others="true" style="color: rgb(187, 187, 187);"></span>
            <ul id="about-menu" class="dropdown-menu theme-black pull-right" role="menu">
                <li title="下载全平台客户端"><a tabindex="-1" href="https://www.zybuluo.com/cmd" target="_blank"><i class="icon-laptop"></i>下载客户端</a></li>
                <li title="@ghosert"><a tabindex="-1" href="http://www.weibo.com/ghosert" target="_blank"><i class="icon-weibo"></i>关注开发者</a></li>
                <li title=""><a tabindex="-1" href="https://github.com/ghosert/cmd-editor/issues" target="_blank"><i class="icon-github-alt"></i>报告问题，建议</a></li>
                <li title="support@zybuluo.com"><a tabindex="-1" href="mailto:support@zybuluo.com" target="_blank"><i class="icon-envelope"></i>联系我们</a></li>
            </ul>
        </li>
    </ul>
</div>
<ul id="reader-full-toolbar-tail" class="reader-full-toolbar-tail-shown">
    <li class="preview-button-full-reader" id="preview-hidden-button" title="隐藏工具栏 Ctrl+Alt+I">
        <span class="icon-chevron-sign-right" style="color: rgb(187, 187, 187);"></span>
    </li>
</ul>






<!-- side remark, hidden when loading. -->
<div class="remark-list side-remark-hidden">
    <div class="remark-items">
    </div>
    <div class="leave-remark unselectable"><span class="icon-plus-sign-alt"></span><span>添加新批注</span></div>
    <div class="new-remark">
    <div class="remark-head"><a><img src="./零基础入门深度学习(6) - 长短时记忆网络(LSTM) - 作业部落 Cmd Markdown 编辑阅读器_files/default-head.jpg"></a></div>
    <div class="remark-author unselectable"></div>
    <div class="remark-editor" contenteditable="true" spellcheck="false"></div>
    <!-- this will be filled up by js.
    <div class="inline-error">402/400</div> for new remark
    <div class="inline-error">202/200</div> for new reply
    -->
    <div class="remark-footer unselectable">
        <button class="remark-save btn-link">保存</button>
        <button class="remark-cancel btn-link">取消</button>
    </div>

        <!-- clone the template $('.new-remark-reply').html() to here.-->
        <div class="remark-notice">在作者公开此批注前，只有你和作者可见。</div>
    </div>
</div>

<!-- template for new remark/reply -->
<div class="new-remark-reply side-remark-hidden">
    <div class="remark-head"><a><img src="./零基础入门深度学习(6) - 长短时记忆网络(LSTM) - 作业部落 Cmd Markdown 编辑阅读器_files/default-head.jpg"></a></div>
    <div class="remark-author unselectable"></div>
    <div class="remark-editor" contenteditable="true" spellcheck="false"></div>
    <!-- this will be filled up by js.
    <div class="inline-error">402/400</div> for new remark
    <div class="inline-error">202/200</div> for new reply
    -->
    <div class="remark-footer unselectable">
        <button class="remark-save btn-link">保存</button>
        <button class="remark-cancel btn-link">取消</button>
    </div>
</div>

<!-- template for .remark-item/.remark-reply -->
<div class="remark-item-reply side-remark-hidden">
    <div class="remark-head"><a><img src="./零基础入门深度学习(6) - 长短时记忆网络(LSTM) - 作业部落 Cmd Markdown 编辑阅读器_files/default-head.jpg"></a></div>
    <div class="remark-author unselectable"></div>
    <div class="remark-delete-link unselectable"><span class="icon-remove"></span></div> <!--This is mainly for deleting remark-reply, shown when author/remark hovering on remark-reply.-->
    <div class="remark-editor" contenteditable="true" spellcheck="false"></div>
    <!-- this will be filled up by js.
    <div class="inline-error">402/400</div> for new remark
    <div class="inline-error">202/200</div> for new reply
    -->
    <div class="remark-footer unselectable">
        <button class="remark-edit btn-link">修改</button>
        <button class="remark-save btn-link">保存</button>
        <button class="remark-cancel btn-link">取消</button>
        <button class="remark-delete btn-link">删除</button>
    </div>
</div>

<!-- template for remark-item-->
<div class="remark-item side-remark-hidden" data-rand-id="" data-version-id="">
    <div class="remark-published-link unselectable"><span class="icon-link icon-rotate-90"></span></div>
    <ul class="remark-options theme unselectable theme-white">
        <li class="remark-private"><span class="icon-eye-close"></span><span>私有</span></li>
        <li class="remark-public"><span class="icon-group"></span><span>公开</span></li>
        <li class="remark-delete"><span class="icon-remove"></span><span>删除</span></li>
    </ul>

    <!-- clone the template $('.remark-item-reply').html() to here.-->

    <button class="remark-reply-view-more btn-link">查看更早的 5 条回复</button>
    <div class="remark-replies">
        <!--
        <div class="remark-reply">
            clone the template $('.remark-item-reply').html() to here.
        </div>
        -->
    </div>

    <div class="leave-reply unselectable"><span>回复批注</span></div>
    <div class="new-reply">
        <!-- clone the template $('.new-remark-reply').html() to here.-->
    </div>
</div>

<!-- jiawzhang NOTICE: .remark-icons will be put to mdeditor.mako and user_note.mako, where next to .wmd-preview -->
<!-- <div class="remark-icons"></div> -->

<!-- template for remark-icon -->
<div class="remark-icon unselectable side-remark-hidden remark-icon-empty" style="display: none;">
    <span class="icon-stack">
        <i class="glyph-comment"></i>
        <span class="remark-count"></span>
    </span>
</div>


<!-- canvas, hidden always, this is used to convert svg to canvas and then convert canvas to png. -->
<canvas id="svg-canvas-image" class="editor-reader-hidden-always"></canvas>

<!-- This is the image panel to hold enlarged image/svg. -->
<div id="large-image-panel">
    <img id="large-image">
</div>


    


    <!-- Hidden Popup Modal -->
    <div id="notification-popup-window" class="modal hide fade theme theme-white" tabindex="-1" role="dialog" aria-labelledby="notification-title" aria-hidden="true">
        <div class="modal-header theme theme-white">
            <button type="button" class="close" data-dismiss="modal" aria-hidden="true">×</button>
            <h3 id="notification-title">通知</h3>
        </div>
        <div class="modal-body theme theme-white">
            <p></p>
        </div>
        <div class="modal-footer theme theme-white">
            <button id="notification-cancel" class="btn" data-dismiss="modal" aria-hidden="true">取消</button>
            <button id="notification-confirm" class="btn btn-primary">确认</button>
        </div>
    </div>

    <!-- zybuluo's foot -->

    <script src="./零基础入门深度学习(6) - 长短时记忆网络(LSTM) - 作业部落 Cmd Markdown 编辑阅读器_files/288313bb.base.lib.min.js"></script>

    <script>
        Namespace('com.zybuluo.base');
        com.zybuluo.base.initData = {
            globalPromptUrl: "https://zybuluo.com/global/prompt",
        };
    </script>

    
    <!--mathjax-->
    <!--blacker: 1 below means font weight.-->
    <script type="text/x-mathjax-config;executed=true">
        MathJax.Hub.Config({ tex2jax: { inlineMath: [['$','$'], ["\\(","\\)"]], processEscapes: true }, TeX: { equationNumbers: { autoNumber: "AMS" } }, messageStyle: "none", SVG: { blacker: 1 }});
    </script>
    <script src="./零基础入门深度学习(6) - 长短时记忆网络(LSTM) - 作业部落 Cmd Markdown 编辑阅读器_files/mathJax.js"></script>
    <!--mathjax source code is here: https://github.com/mathjax/MathJax.-->
    <script src="./零基础入门深度学习(6) - 长短时记忆网络(LSTM) - 作业部落 Cmd Markdown 编辑阅读器_files/mathJax(1).js"></script>

    <script>
        Namespace('com.zybuluo.mdeditor.layout');
        com.zybuluo.mdeditor.layout.initData = {
            // '' means not logged in, otherwise the logged in username, for mdeditor.mako, this value will be reset in render.js otherwise, for user_note.mako, it's rendered by server side.
            loggedInUsername: '',
            isPageOwner: 'False' === 'True' ? true : false,
            loginComeFromUrl: 'https://zybuluo.com/login?return_to=https%3A%2F%2Fzybuluo.com%2Fhanbingtao%2Fnote%2F581764',
            noteRemarksUrl: "https://zybuluo.com/note/581764/remarks", 
            newNoteRemarkUrl: "https://zybuluo.com/note/581764/remark/new", 
            updateNoteRemarkUrl: "https://zybuluo.com/note/581764/remark/update", 
            deleteNoteRemarkUrl: "https://zybuluo.com/note/581764/remark/delete", 
            publishNoteRemarkUrl: "https://zybuluo.com/note/581764/remark/publish", 
            newNoteRemarkReplyUrl: "https://zybuluo.com/note/581764/remark_reply/new", 
            updateNoteRemarkReplyUrl: "https://zybuluo.com/note/581764/remark_reply/update", 
            deleteNoteRemarkReplyUrl: "https://zybuluo.com/note/581764/remark_reply/delete", 
        };

        // BEGIN: pace.js configuration
        window.paceOptions = {
            // disable others, enable for ajax call only,
            ajax: true,
            document: false,
            elements: false,
            eventLag: false,
        };
        // jiawzhang NOTICE: to make sure pace.js is working for any ajax call especially the jquery ajax, add 'Pace.restart()' into jquery ajax call like '$.post'
        // Originally, pace 0.5.6 doesn't support jquery ajax, see details in: https://github.com/HubSpot/pace/issues/29
        // END: pace.js configuration

    </script>

    <script src="./零基础入门深度学习(6) - 长短时记忆网络(LSTM) - 作业部落 Cmd Markdown 编辑阅读器_files/7a70106e.layout.lib.min.js"></script>

    <script src="./零基础入门深度学习(6) - 长短时记忆网络(LSTM) - 作业部落 Cmd Markdown 编辑阅读器_files/dc648f35.layout.min.js"></script><div id="medium-editor-toolbar-1" class="medium-editor-toolbar"><ul id="medium-editor-toolbar-actions" class="medium-editor-toolbar-actions clearfix"><li><button data-action="remark" class=" medium-editor-button-first"><i class="icon-comment"></i></button></li><li><button data-action="highlight" class=" medium-editor-button-last"><i class="icon-pencil"></i></button></li></ul></div>



    

    <!-- https://zybuluo.com/static/assets/mdeditor/user_note.lib.min.js -->
    <!-- -->

    <script>
        Namespace('com.zybuluo.mdeditor.user_note');
        com.zybuluo.mdeditor.user_note.initData = {
            isLoggedIn: 'False',
            mdeditorUrl: "https://zybuluo.com/mdeditor",
            passwordPassed: 'True' === 'True' ? true : false,
        };
    </script>

    <script src="./零基础入门深度学习(6) - 长短时记忆网络(LSTM) - 作业部落 Cmd Markdown 编辑阅读器_files/6cd3112e.user_note.min.js"></script>





    
</body></html>